# Time Series Analysis

Aim: Find insights using the time series with respect to for example:
- engagement rate (per post and follower)
- follower gain

In [None]:
import matplotlib.pyplot as plt
import scipy
from scipy.signal import find_peaks

In [None]:
df_new = preprocess_data(df) #function from Dave's preprocesing.ipynb

---------
## Number of Followers

In [None]:
# Convert 'date' column to datetime
df_new['date'] = pd.to_datetime(df_new['date'])

# Group by brand
grouped = df_new.groupby('brand')

# Plot time series for 'followers' for each brand
for brand, group in grouped:
    plt.plot(group['date'], group['followers'], label=brand)

# Add labels and legend
plt.xlabel('Week')
plt.ylabel('Number of Followers [Normalized]')
plt.title('Followers Time Series by Brand')
#plt.legend()
plt.show()

In [None]:
# Find the last date for each brand
last_dates = df_new.groupby('brand')['date'].max().reset_index()

# Merge to get the corresponding rows
last_rows = pd.merge(last_dates, df_new, on=['brand', 'date'], how='inner')

# Sort the rows by the number of followers in descending order
ranked_brands = last_rows.sort_values(by='followers', ascending=False)

# Output the sorted DataFrame
print(ranked_brands[0:10])

------
## Engagement Rate (per Post and Follower)

In [None]:
# Plot time series for 'followers' for each brand
for brand, group in grouped:
    plt.plot(group['date'], group['engagement_rate_per_post'], label=brand)

# Add labels and legend
plt.xlabel('Week')
plt.ylabel('Engagement Rate per Post and per Follower')
plt.title('Engagement Rate Time Series by Brand')
#plt.legend()
plt.show()

In [None]:
# Find the last date for each brand
last_dates = df_new.groupby('brand')['date'].max().reset_index()

# Merge to get the corresponding rows
last_rows = pd.merge(last_dates, df_new, on=['brand', 'date'], how='inner')

# Sort the rows by the number of followers in descending order
ranked_brands = last_rows.sort_values(by='engagement_rate_per_post', ascending=False)

# Output the sorted DataFrame
print(ranked_brands[0:10])

In [None]:
# Example time series data (replace this with your actual data)
time_series = df_new[df_new['brand'] == 'In-N-Out Burger']['engagement_rate_per_post']

# Set the threshold to be 0.1 times the maximum peak height in the time series
threshold = 0.3 * time_series.max()
# Find peaks
peaks, _ = find_peaks(time_series, height=0, prominence=threshold)

# Plot the time series
plt.plot(time_series.index, time_series.values, label='Time Series')

# Plot the peaks
plt.plot(time_series.index[peaks], time_series.values[peaks], 'ro', markersize=5, label='Peaks')

# Add labels and legend
plt.xlabel('Week')
plt.ylabel('Engagement Rate')
plt.title('Engagement Rate Time Series with Peaks of In-N-Out Burger')
plt.legend()
plt.show()