-
Notifications
You must be signed in to change notification settings - Fork 0
/
callGraphApi.py
128 lines (97 loc) · 4.59 KB
/
callGraphApi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import requests
import json
import pandas as pd
from pprint import pprint
from datetime import datetime
from datetime import date, timedelta
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1500)
startTime = datetime.now()
from sqlalchemy import create_engine, MetaData, Table
#For loading the response data into a PostrgeSQL DB, please insert necessary credentials into the vaiable "engine" and remove the hashtag #.
#engine = create_engine('postgresql+psycopg2://USERNAME:PASSWORD@HOSTNAME:PORTNUMBER/DBNAME')
#DUE TO EXTENT OF DATA .csv and xlsx EXPORTS ARE NOT RECOMMENDED
#ADD PAGE NAME
page_name = 'PAGE NAME'
#ADD PAGE ID
url_page_id = 'PAGE ID'
#ADD YOUR GRAPH API ACCESS TOKEN
url_access_token = "ACCESS TOKEN"
url_base = 'https://graph.facebook.com/v3.2/'
url_insights_access_token = "/insights?access_token="
"&"
url_dk = "pretty=1"
url_metric= "metric="
url_since= "since="
url_until = "until="
#Be careful with long date ranges, it is recommended to load 1 month intervals (risk of Memory overload)
day_from = date(2020, 12, 31)
day_to = date(2021, 2, 1)
delta = day_to - day_from
base_dates = []
for i in range(delta.days + 1):
base_dates.append(str(day_from + timedelta(i)))
#List of variables for extraction
group2 = ["page_fans_city", "page_fans_country","page_impressions_by_country_unique", "page_content_activity_by_country_unique", "page_impressions_by_city_unique", "page_content_activity_by_city_unique"]
#List of variables that may be extracted
'''
["page_content_activity_by_action_type", "page_content_activity_by_action_type_unique", "page_content_activity_by_age_gender_unique", "page_content_activity_by_city_unique",\
"page_content_activity_by_country_unique", "page_content_activity_by_locale_unique", "page_fans_by_like_source_unique", "page_fans_by_unlike_source_unique", "page_fans_city",\
"page_fans_country", "page_fans_gender_age", "page_fans_locale", "page_fans_online", "page_impressions_by_age_gender_unique", "page_impressions_by_city_unique", \
"page_impressions_by_country_unique", "page_impressions_by_locale_unique", "page_impressions_by_story_type_unique", "page_impressions_frequency_distribution", "page_negative_feedback_by_type", \
"page_negative_feedback_by_type_unique", "page_positive_feedback_by_type", "page_positive_feedback_by_type_unique", "page_views_by_age_gender_logged_in_unique", "page_views_by_internal_referer_logged_in_unique", \
"page_views_external_referrals"]
'''
for item in group2:
metric = item
global list_of_urls
list_of_urls = []
def urls_list(dates, url_list_n):
x = 0
y = 2
url = url_base + url_page_id + url_insights_access_token + url_access_token + "&" + url_dk + "&" + url_metric + metric + "&" + url_since + dates[x] + "&" + url_until + dates[y]
url_list_n.append(url)
print("working on urls " + str(datetime.now() - startTime))
while x < len(base_dates) - 3:
x = x + 1
y = y +1
url = url_base + url_page_id + url_insights_access_token + url_access_token + "&" + url_dk + "&" + url_metric + metric + "&" + url_since + dates[x] + "&" + url_until + dates[y]
url_list_n.append(url)
for item in group2:
metric = item
urls_list(base_dates, list_of_urls)
global dataframes_list
dataframes_list = []
for item in list_of_urls:
#print(item)
try:
req = requests.get(item)
js = req.json()
countries = js['data'][0]['values'][0]['value']
date = js['data'][0]['values'][0]['end_time']
var_id = js['data'][0]['name']
#print('Working on var_id: ' + var_id + ' for date: ' + date)
df = pd.DataFrame.from_dict(countries, orient='index')
df.reset_index(level=0, inplace=True)
df1 = pd.DataFrame()
df1['country_id'] = df['index']
df1['value'] = df[0]
df1.loc[:,'var_id'] = var_id
df1.loc[:,'date'] = date
df1.loc[:,'page_id'] = url_page_id
df1.loc[:,'page_name'] = page_name
dataframes_list.append(df1)
except(IndexError, KeyError):
print('Error var_id: ' + var_id + ' at date: ' + date)
time.sleep(1)
pass
try:
data = pd.concat(dataframes_list)
#print(data)
data.reset_index(level=0, inplace=True)
print("working on " + str(var_id) + " day " + str(date) + " " + str(datetime.now() - startTime))
except(ValueError):
pass
#EXPORT TO DB
data.to_sql('fb_data_in', engine, if_exists='append')
print(" DONE IN " + str(datetime.now() - startTime))