In [1]:
import pandas as pd
import numpy as np

# Cleaning and Formatting FB Likes and IG Followers

In [2]:
df = pd.read_excel('meta_likes_follows.xlsx', header=0)
df.head()

Unnamed: 0,Facebook Page new likes,Unnamed: 1
0,Date,New Facebook Page likes
1,2021-07-01T00:00:00,0
2,2021-07-02T00:00:00,0
3,2021-07-03T00:00:00,1
4,2021-07-04T00:00:00,0


In [3]:
df.columns = list(df.iloc[0])

In [4]:
df = df.drop(index=0, axis=0)

In [5]:
# at what row index, does the FB data stop?
limit = df[df['Date'].isnull() & df['New Facebook Page likes'].isnull()].index.values[0]

In [6]:
df_fb = df[:limit-1]

In [7]:
df_ig = df[limit + 1:]

df_ig.head()

Unnamed: 0,Date,New Facebook Page likes
487,Date,New Instagram followers
488,2022-03-10T00:00:00,2
489,2022-03-17T00:00:00,1
490,2022-03-18T00:00:00,2
491,2022-03-19T00:00:00,1


In [8]:
df_ig.columns = list(df_ig.loc[limit + 2])

In [9]:
df_ig.drop(index=limit + 2).reset_index(drop=True)

Unnamed: 0,Date,New Instagram followers
0,2022-03-10T00:00:00,2
1,2022-03-17T00:00:00,1
2,2022-03-18T00:00:00,2
3,2022-03-19T00:00:00,1
4,2022-03-22T00:00:00,2
...,...,...
82,2022-10-07T00:00:00,1
83,2022-10-10T00:00:00,1
84,2022-10-11T00:00:00,1
85,2022-10-17T00:00:00,2


In [10]:
df_merged = df_fb.merge(
                    df_ig,
                    how='left',
                    left_on='Date',
                    right_on='Date'
                )

In [11]:
df_merged['New Instagram followers'] = df_merged['New Instagram followers'].fillna(0)

# Cleaning and Formatting FB page and IF profile visits 

In [12]:
df = pd.read_excel('meta_page_profile.xlsx', header=0)
df

Unnamed: 0,Facebook Page visits,Unnamed: 1
0,Date,Facebook Page likes
1,2021-07-01T00:00:00,11
2,2021-07-02T00:00:00,3
3,2021-07-03T00:00:00,1
4,2021-07-04T00:00:00,5
...,...,...
967,2022-10-23T00:00:00,2
968,2022-10-24T00:00:00,3
969,2022-10-25T00:00:00,8
970,2022-10-26T00:00:00,3


In [13]:
df.columns = ['Date', 'Facebook Page visits']

In [14]:
df = df.drop(index=0).reset_index()

In [15]:
# at what row index, does the FB data stop?
limit2 = df[df['Date'].isnull() & df['Facebook Page visits'].isnull()].index.values[0]

In [16]:
df_fb2 = df[:limit2].reset_index(drop=True).drop(columns='index')
df_fb2 

Unnamed: 0,Date,Facebook Page visits
0,2021-07-01T00:00:00,11
1,2021-07-02T00:00:00,3
2,2021-07-03T00:00:00,1
3,2021-07-04T00:00:00,5
4,2021-07-05T00:00:00,3
...,...,...
479,2022-10-23T00:00:00,3
480,2022-10-24T00:00:00,0
481,2022-10-25T00:00:00,1
482,2022-10-26T00:00:00,2


In [17]:
df_ig2 = df[limit2+3:].reset_index(drop=True).drop(columns='index')

In [18]:
df_ig2.columns = ['Date', 'Instagram Profile Visits']

In [19]:
df_merged2 = df_fb2.merge(
    df_ig2,
    how='outer',
    left_on='Date',
    right_on='Date'
)

# Cleaning and Formatting FB and IG Reach

In [20]:
df = pd.read_excel('meta_reach.xlsx')

In [21]:
df.columns = df.iloc[0]

In [22]:
df = df.drop(index=0, axis=0)

In [23]:
df.columns

Index(['Date', 'Facebook Page reach'], dtype='object', name=0)

In [24]:
limit3 = df[df['Date'].isna() & df['Facebook Page reach'].isna()].index.values[0]

In [25]:
df_fb3 = df[:limit3-1]

In [26]:
df_ig3 = df[limit + 2:].reset_index(drop=True)

In [27]:
df_ig3.columns = ['Date', 'Instagram reach']

In [28]:
df_merged3 = df_fb3.merge(
    df_ig3,
    how='outer',
    left_on='Date',
    right_on='Date'
)

# Merge the three datasets

In [29]:
df_merged

Unnamed: 0,Date,New Facebook Page likes,New Instagram followers
0,2021-07-01T00:00:00,0,0
1,2021-07-02T00:00:00,0,0
2,2021-07-03T00:00:00,1,0
3,2021-07-04T00:00:00,0,0
4,2021-07-05T00:00:00,0,0
...,...,...,...
479,2022-10-23T00:00:00,0,0
480,2022-10-24T00:00:00,0,0
481,2022-10-25T00:00:00,0,1
482,2022-10-26T00:00:00,0,0


In [30]:
df_merged2

Unnamed: 0,Date,Facebook Page visits,Instagram Profile Visits
0,2021-07-01T00:00:00,11,11
1,2021-07-02T00:00:00,3,15
2,2021-07-03T00:00:00,1,0
3,2021-07-04T00:00:00,5,6
4,2021-07-05T00:00:00,3,10
...,...,...,...
479,2022-10-23T00:00:00,3,2
480,2022-10-24T00:00:00,0,3
481,2022-10-25T00:00:00,1,8
482,2022-10-26T00:00:00,2,3


In [31]:
df_merged3

Unnamed: 0,Date,Facebook Page reach,Instagram reach
0,2021-07-01T00:00:00,132,122
1,2021-07-02T00:00:00,178,224
2,2021-07-03T00:00:00,322,177
3,2021-07-04T00:00:00,189,106
4,2021-07-05T00:00:00,167,122
...,...,...,...
479,2022-10-23T00:00:00,25,25
480,2022-10-24T00:00:00,27,16
481,2022-10-25T00:00:00,42,54
482,2022-10-26T00:00:00,30,102


In [32]:
df_mega_merge = df_merged.merge(df_merged2)

In [33]:
df_mega_merge = df_mega_merge.merge(df_merged3)

In [34]:
df_mega_merge.to_excel('meta_toCopy.xlsx', index=False)