##### Import relevant packages

In [3]:
import pandas as pd
import matplotlib

##### Reference similarweb .csv files and read to DataFrames (need to be more programmatic)

In [4]:
feb_data_file = 'similarweb_feb.csv'
mar_data_file = 'similarweb_march.csv'

In [5]:
df_feb = pd.read_csv(feb_data_file)
df_mar = pd.read_csv(mar_data_file)

##### Preview the top 5 rows of the February DataFrame

In [6]:
df_feb.head()

Unnamed: 0,Account Name,Name,Average Visit Duration,Website Category,2nd Traffic Country,Bounce Rate,Category Rank,Desktop Visits Share,Direct Visits Share,Display Ads Visits Share,...,Mobile Web Visits Share,Monthly Unique Visitors,Pages Per Visit,Paid Search Visits Share,Total Monthly Visits,Total Visits MoM Growth,Top Traffic Country,Social Visits Share,Account ID,Snapshot Date
0,Meez Culinary Solutions LLC,getmeez.com,,,,,,0.0,,,...,0.0,0,,,,-100.0,,,0012K00001fHHQN,2/1/20
1,PolyOne,polyone.com,0:04:07,Heavy Industry and Engineering/Chemical Industry,Canada,40.55,219.0,86.95,45.75,,...,13.05,26475,4.45,2.48,64385.0,-18.36,United States,1.27,0012K00001ZKgZV,2/1/20
2,Town Boys and Girls Club,bgcnt.net,,,,,,0.0,,,...,0.0,0,,,,-100.0,,,0016A00000n9ySf,2/1/20
3,Key Packaging,keypackaging.com,,,,,,0.0,,,...,0.0,0,,,,-100.0,,,0016A00000n9vCp,2/1/20
4,Piedmont Rural Telephone Cooperative Inc,prtcnet.com,0:06:09,Health/Health,,28.19,9777.0,90.92,86.11,,...,9.08,8408,3.43,,56630.0,-2.7,United States,0.04,0016A00000n9wkV,2/1/20


##### Reorder columns to most logical sequencing (not required)

In [7]:
df_feb.columns
columns_ordered = ['Account ID','Account Name', 'Name', 'Domain', 'Website Category','Category Rank',
                   'Global Rank', 'Top Traffic Country','2nd Traffic Country', 'Has Data','Total Monthly Visits',
                   'Monthly Unique Visitors','Bounce Rate', 'Pages Per Visit', 'Average Visit Duration',
                   'Total Visits MoM Growth','Desktop Visits Share', 'Mobile Web  Visits Share',
                   'Direct Visits Share','Display Ads Visits Share','Mail Visits Share',
                   'Paid Search Visits Share', 'Social Visits Share','Snapshot Date']
df_feb = df_feb[columns_ordered]
df_mar = df_mar[columns_ordered]

##### Fill all #NAs with 0s for Monthly Visits and Paid Search/Display Ads columns

In [8]:
df_feb['Total Monthly Visits'].fillna(0, inplace=True)
df_feb['Paid Search Visits Share'].fillna(0, inplace=True)
df_feb['Display Ads Visits Share'].fillna(0, inplace=True)

df_mar['Total Monthly Visits'].fillna(0, inplace=True)
df_mar['Paid Search Visits Share'].fillna(0, inplace=True)
df_mar['Display Ads Visits Share'].fillna(0, inplace=True)

##### Compute Total Ad Spend Visits Share (Paid Search + Display Ads) then calculate # of visits from Total Monthly Visits

In [9]:
df_feb['Total Ad Spend Visits Share'] = df_feb['Paid Search Visits Share'] + df_feb['Display Ads Visits Share']
df_feb['Total Ad Spend Visits'] = df_feb['Total Monthly Visits']*df_feb['Total Ad Spend Visits Share']/100

df_mar['Total Ad Spend Visits Share'] = df_mar['Paid Search Visits Share'] + df_mar['Display Ads Visits Share']
df_mar['Total Ad Spend Visits'] = df_mar['Total Monthly Visits']*df_mar['Total Ad Spend Visits Share']/100

###### Slim down DataFrame for only relevant columns to be output to .csv

In [10]:
df_feb_slim = df_feb[['Account ID', 'Account Name', 'Domain', 'Website Category', 'Total Monthly Visits',
                     'Total Visits MoM Growth','Total Ad Spend Visits Share','Total Ad Spend Visits']]

df_mar_slim = df_mar[['Account ID', 'Account Name', 'Domain', 'Website Category', 'Total Monthly Visits',
                     'Total Visits MoM Growth','Total Ad Spend Visits Share','Total Ad Spend Visits']]

###### Left join March and February, add suffixes to column names and drop redundant columns

In [11]:
merged = df_mar_slim.merge(df_feb_slim,on=['Account ID'],how='left',suffixes=('_march', '_feb'))
merged.drop(columns=['Account Name_feb', 'Domain_feb', 'Website Category_feb'], axis=1, inplace=True)

##### Export merged DataFrame to .csv file on local repository

In [12]:
merged.to_csv('feb_march_similarweb.csv')

##### Other scratchwork below...

In [13]:
merged.columns

Index(['Account ID', 'Account Name_march', 'Domain_march',
       'Website Category_march', 'Total Monthly Visits_march',
       'Total Visits MoM Growth_march', 'Total Ad Spend Visits Share_march',
       'Total Ad Spend Visits_march', 'Total Monthly Visits_feb',
       'Total Visits MoM Growth_feb', 'Total Ad Spend Visits Share_feb',
       'Total Ad Spend Visits_feb'],
      dtype='object')

In [16]:
merged['Total Visits Delta'] = merged['Total Monthly Visits_march'] - merged['Total Monthly Visits_feb']
merged['Total Ad Visits Delta'] = merged['Total Ad Spend Visits_march'] - merged['Total Ad Spend Visits_feb']

In [17]:
merged[['Website Vertical','Website Industry']] = merged['Website Category_march'].str.split('/',expand=True)

Unnamed: 0,Account ID,Account Name_march,Domain_march,Website Category_march,Total Monthly Visits_march,Total Visits MoM Growth_march,Total Ad Spend Visits Share_march,Total Ad Spend Visits_march,Total Monthly Visits_feb,Total Visits MoM Growth_feb,Total Ad Spend Visits Share_feb,Total Ad Spend Visits_feb,Total Visits Delta,Total Ad Visits Delta
0,0012K00001fHHQN,Meez Culinary Solutions LLC,getmeez.com,,71.0,,0.0,0.0,0.0,-100.0,0.0,0.0,71.0,0.0
1,0012K00001WDB05,LuxAddiction.com,luxaddiction.com,Lifestyle/Beauty and Cosmetics,11686.0,2.92,0.0,0.0,11355.0,69.69,0.0,0.0,331.0,0.0
2,0016A00000n9y7f,Tecnocap LLC,tecnocapclosures.com,Business and Consumer Services/Textiles,772.0,-51.95,0.0,0.0,1607.0,8.84,0.0,0.0,-835.0,0.0
3,0016A00000n9vO3,Lertek,lertek.com,,787.0,1.14,0.0,0.0,778.0,-29.29,0.0,0.0,9.0,0.0
4,0016A00000n9xB2,Regency Real Estate Brokers Inc,regencyrealestate.com,Business and Consumer Services/Real Estate,4069.0,108.4,0.0,0.0,1952.0,35.72,0.0,0.0,2117.0,0.0


(17468, 14)