# Familes in Schools
Facebook Insights Script Goals:
- import data
- extract useful data
- format a new df
- export cleaned df to append to Google Sheets 

In [1]:
import pandas as pd
import numpy as np

# Extracting from Sheet 1
- Post ID
- Permalink
- Post Message
- Type
- Post Date
- Post Time
- Total Reach
- Organic Reach
- Total Impressions
- Organic Impressions

In [2]:
# import the sheets where we get data from

fb_1 = pd.read_excel('/Users/jairusmartinez/Desktop/fb_insights_data.xlsx', sheet_name=0) 

In [3]:
# drop unwanted columns and keep relevant

fb_1 = (fb_1.loc[:, ['Post ID', 'Permalink', 'Post Message', 'Type', 'Posted', 
                     'Lifetime Post Total Reach', 'Lifetime Post organic reach',
                     'Lifetime Post Total Impressions','Lifetime Post Organic Impressions']])

In [4]:
# drop the uncesarry first row as formatted by FB

fb_1 = fb_1.drop(index=0, axis=0).reset_index()

In [5]:
# drop the additional index column

fb_1 = fb_1.drop(columns=['index'])

In [6]:
# change the name to match the Google Sheets

fb_1.columns = (['Post ID', 'Permalink', 'Post Message', 'Type', 'Post Date',
                 'Total Reach (Unique Users)', 'Organic reach',
                 'Total Impressions (Non-Unique)', 'Organic Impressions'])

# Extracting from Sheet 3
- Like
- Comment
- Share

In [7]:
# import sheet 3

fb_3 = pd.read_excel('/Users/jairusmartinez/Desktop/fb_insights_data.xlsx', sheet_name=2) 

In [8]:
# grab the columns we need, along with primary key

fb_3 = fb_3.loc[:, ['Post ID', 'like', 'share', 'comment']]

In [9]:
# rename columns

fb_3.columns = ['Post ID', 'Likes', 'Shares', 'Comments']

# Extracting from Sheet 5
- Photo View
- Link Clicks
- Other Clicks

In [10]:
# import sheet 5

fb_5 = pd.read_excel('/Users/jairusmartinez/Desktop/fb_insights_data.xlsx', sheet_name=4) 

In [11]:
# grab columns and primary key

fb_5 = fb_5.loc[:,['Post ID', 'other clicks', 'photo view', 'link clicks']]

In [12]:
# rename to match google sheets

fb_5.columns = ['Post ID', 'Other Clicks', 'Photo Views', 'Link Clicks']

# Join the DataFrames
1. join df_1 with df_3
2. add df_5

In [13]:
# join df_ 1 with fb_3

merge13 = fb_1.merge(
    fb_3,
    how='outer',
    left_on='Post ID',
    right_on='Post ID')

In [14]:
# join merge13 with fb_5

df = merge13.merge(
    fb_5,
    how='inner',
    on='Post ID')

# Clean all NaN Values and replace with 0

In [15]:
for col in df.columns:
    df[str(col)] = df[str(col)].fillna(0)

# Set Post Date to datetime64

In [16]:
# set datetime64

df['Post Date'].astype('datetime64')

0    2022-12-01 17:31:56
1    2022-11-29 16:00:18
2    2022-11-24 09:00:56
3    2022-11-18 15:00:16
4    2022-11-17 16:00:13
5    2022-11-14 12:00:18
6    2022-11-11 17:30:11
7    2022-11-11 11:45:12
8    2022-11-10 17:00:16
9    2022-11-02 15:00:25
10   2022-10-28 16:00:05
11   2022-10-27 17:00:24
12   2022-10-26 17:00:14
13   2022-10-25 17:00:18
14   2022-10-21 15:00:19
15   2022-10-10 14:37:10
16   2022-10-07 15:00:43
17   2022-10-05 16:30:11
18   2022-10-05 11:45:13
Name: Post Date, dtype: datetime64[ns]

In [17]:
# extract time and create a new column
#df['Post Time'] = df['Post Date'].dt.strftime('%I:%M %p')

In [18]:
df = df.assign(
    Post_Time=df['Post Date'].dt.strftime('%I:%M %p'),
    Engagement=df['Likes'] + df['Shares'] + df['Comments'],
    Engagement_clicks=df['Other Clicks'] + df['Photo Views'] + df['Link Clicks'],
    Total_Engagement=lambda x: x['Engagement'] + x['Engagement_clicks'],
    Engagement_rate=lambda x: round((x['Total_Engagement'] / df['Total Reach (Unique Users)']) * 100, 1)   
)

In [19]:
df.columns

Index(['Post ID', 'Permalink', 'Post Message', 'Type', 'Post Date',
       'Total Reach (Unique Users)', 'Organic reach',
       'Total Impressions (Non-Unique)', 'Organic Impressions', 'Likes',
       'Shares', 'Comments', 'Other Clicks', 'Photo Views', 'Link Clicks',
       'Post_Time', 'Engagement', 'Engagement_clicks', 'Total_Engagement',
       'Engagement_rate'],
      dtype='object')

In [20]:
# reorder columns to match google sheets
df = (df.loc[:,['Post ID', 'Permalink', 'Post Message', 'Type', 'Post Date', 'Post_Time', 
       'Engagement_rate','Total Reach (Unique Users)', 'Organic reach',
       'Total Impressions (Non-Unique)', 'Organic Impressions', 'Likes',
       'Shares', 'Comments', 'Engagement', 'Other Clicks', 'Photo Views', 'Link Clicks',
       'Engagement_clicks', 'Total_Engagement']])

In [21]:
df = df.rename(columns={'Engagement_clicks': 'Engagement (Clicks)', 'Engaement_rate': 'Engagement Rate',
                  'Engagement': 'Engagement (Reactions, Shares, Comments, etc.)',
                  'Total_Engagement': 'Total Engagement', 'Post_Time':'Post Time'})

# Export df

In [23]:
df.to_excel('fb_toCopy.xlsx', index=False)