# Familes in Schools
IG Insights Script Goals:
- import data
- extract useful data
- format a new df
- export cleaned df to append to Google Sheets 

In [1]:
import pandas as pd
import numpy as np

# Import Data

In [2]:
df = pd.read_csv('ig_insights_data.csv')
df.head()

Unnamed: 0,Content,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11
0,Caption,Post time,Content type,Reach,Likes and reactions,Sticker taps,Replies,Link clicks,Comments,Shares,Results,Cost per result
1,A Tale of Two (Internet) Cities: @Calfund case...,2022-10-25T17:00:57,Instagram post,92,10,,,,0,0,,
2,A Tale of Two (Internet) Cities: California Co...,2022-10-25T17:00:18,Facebook post,40,0,,,,0,0,,
3,Are you a dynamic leader who is passionate abo...,2022-10-21T15:00:41,Instagram post,102,8,,,,0,2,,
4,Are you a dynamic leader who is passionate abo...,2022-10-21T15:00:19,Facebook post,45,1,,,5,0,0,,


# Clean Up Columns

In [3]:
# set 0 index as the column headers
df.columns = list(df.iloc[0])

In [4]:
# drop 0 index and reset index
df = df.drop(index=0, axis=0).reset_index()

In [5]:
# drop uneeded columns
df = df.drop(columns=['index','Sticker taps', 'Replies', 'Link clicks', 'Results', 'Cost per result'])

In [6]:
# rename columns
df = df.rename(columns={'Post time': 'Post Date', 'Likes and reactions': 'Likes'})

# Set datetime64 dtype

In [7]:
# set Post Date to datetime64
df['Post Date'].astype('datetime64').dt.strftime('%I:%M %p')

0     05:00 PM
1     05:00 PM
2     03:00 PM
3     03:00 PM
4     01:17 PM
5     11:59 AM
6     02:37 PM
7     02:37 PM
8     03:01 PM
9     03:00 PM
10    11:44 AM
11    04:30 PM
12    04:30 PM
13    11:45 AM
14    11:45 AM
15    11:45 AM
16    04:30 PM
17    04:30 PM
18    11:01 AM
19    12:45 PM
20    12:45 PM
21    06:00 PM
22    05:45 PM
23    12:00 PM
24    11:45 AM
25    12:22 PM
26    12:21 PM
27    04:56 PM
28    05:15 PM
29    05:00 PM
30    09:01 AM
31    09:01 AM
32         NaN
33         NaN
34    05:31 PM
35         NaN
36         NaN
37    05:28 PM
38    01:05 PM
39    01:05 PM
40    04:00 PM
41    04:00 PM
42    11:45 AM
43    11:45 AM
44    05:00 PM
45    05:00 PM
46    10:20 AM
47    10:20 AM
Name: Post Date, dtype: object

# Filter data 

In [8]:
# filter for IG posts only

df = df.loc[df['Content type'] == 'Instagram post'].reset_index()

# Create New Calculated Columns and Format

In [9]:
# create some columns
df = df.assign(
    time=df['Post Date'].astype('datetime64').dt.strftime('%I:%M %p'),
    tot_eng=df['Likes'] + df['Comments'] + df['Shares'],
    eng_rate=lambda x: (x['tot_eng'] / df['Reach'] * 100).astype('float64').round(1)
)

In [10]:
# rename column names to match google sheets
df = df.rename(columns={'time': 'Post Time', 'tot_eng': 'Total Engagement', 'eng_rate': 'Engagement Rate'})

In [11]:
df = df.loc[:,['Caption', 'Post Date', 'Post Time', 'Content type', 'Reach', 'Likes', 'Comments',
       'Shares', 'Total Engagement', 'Engagement Rate']]

# Export Data

In [12]:
df.to_excel('ig_toCopy.xlsx', index=False)