In [None]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import PassiveAggressiveRegressor
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

In [None]:
sns.set_style(style='darkgrid')

In [None]:
warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv('data/instagram_data.csv', encoding='latin1')
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df.info()

In [None]:
plt.figure(figsize=(12, 6))
sns.distplot(df['From Home'])
plt.title('Distribution of Impressions From Home')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.distplot(df['From Hashtags'])
plt.title('Distribution of Impressions From Hashtags')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.distplot(df['From Explore'])
plt.title('Distribution of Impressions From Explore')
plt.show()

In [None]:
home = df["From Home"].sum()
hashtags = df["From Hashtags"].sum()
explore = df["From Explore"].sum()
other = df["From Other"].sum()

labels = ['From Home', 'From Hashtags', 'From Explore', 'Other']
values = [home, hashtags, explore, other]

fig = px.pie(data_frame=df,
             names=labels,
             values=values,
             title='Impressions on Instagram Posts From Various Sources',
             width=800,
             height=400,
             hole=0.5)
fig.show()

In [None]:
text = " ".join(i for i in df['Caption'])
stopwords = set(STOPWORDS)
wordcloud = WordCloud(stopwords=stopwords).generate(text)

plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
text = " ".join(i for i in df['Hashtags'])
stopwords = set(STOPWORDS)
wordcloud = WordCloud(stopwords=stopwords).generate(text)

plt.figure(figsize=(12, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Impressions',
                 y='Likes',
                 size='Likes',
                 trendline='ols',
                 title='Relationship Between Likes and Impressions',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Impressions',
                 y='Comments',
                 size='Comments',
                 trendline='ols',
                 title='Relationship Between Comments and Impressions',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Impressions',
                 y='Shares',
                 size='Shares',
                 trendline='ols',
                 title='Relationship Between Shares and Impressions',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Impressions',
                 y='Saves',
                 size='Saves',
                 trendline='ols',
                 title='Relationship Between Post Saves and Impressions',
                 width=800,
                 height=400)
fig.show()

In [None]:
correlation = df.corr(numeric_only=True)
correlation["Impressions"].sort_values(ascending=False)

In [None]:
conversion_rate = (df["Follows"].sum() / df["Profile Visits"].sum()) * 100
print(conversion_rate)

In [None]:
fig = px.scatter(data_frame=df,
                 x='Profile Visits',
                 y='Follows',
                 size='Follows',
                 trendline='ols',
                 title='Relationship Between Profile Visits and Followers Gained',
                 width=800,
                 height=400)
fig.show()

In [None]:
X = np.array(df[['Likes', 'Saves', 'Comments',
                 'Shares', 'Profile Visits', 'Follows']])

y = np.array(df["Impressions"])

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.20,
                                                    random_state=7)

In [None]:
model = PassiveAggressiveRegressor()
model.fit(X_train, y_train)
model.score(X_test, y_test)

In [None]:
# Features = [['Likes','Saves', 'Comments', 'Shares', 'Profile Visits', 'Follows']]
features = np.array([[282.0, 233.0, 4.0, 9.0, 165.0, 54.0]])
model.predict(features)