# Guide to Using APIs with Python

## Import the required libraries

**MUST DO THIS FIRST**

In [1]:
import pandas as pd
import pprint, requests, json
from pandas.io.json import json_normalize

## RapidAPI
The following APIs are from the RapidAPI website so they all have the same basic format which is outlined in the code block below. Some sources have multiple API options. For the API key, sign up for a free account with RapidAPI and the keys will be generated.

### Instagram
*Option 1*: url = "https://instagramdimashirokovv1.p.rapidapi.com/user/{username}"  
*Option 2*: url = "https://instagram29.p.rapidapi.com/user/{username}"

### Reddit
In addition to the **headers** argument, reddit requires a **params** argument. 

*For posts*: url = "https://socialgrep.p.rapidapi.com/search/posts"  
*For comments*: url = "https://socialgrep.p.rapidapi.com/search/comments"

### Hoaxy


### Google Trends

## SerpApi
These APIs are from SerpApi, which have a specific format that I've adapted.

### Google Scholar Search

### YouTube


#### Turning JSON data into a dataframe

Two easy ways to do this:  
`pd.json_normalize` - this is good for nested dictionaries, like the Reddit one below.  
`pd.read_json` - this is good for flat dictionaries, like the Instagram one

## Instagram Example

In [None]:
# Option 1
url = "https://instagramdimashirokovv1.p.rapidapi.com/user/{username}"

headers = {
    'x-rapidapi-key': "api-key",
    'x-rapidapi-host': "InstagramdimashirokovV1.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers)

print(response.text)

# Option 2
#url = "https://instagram29.p.rapidapi.com/user/{username}"

#headers = {
#    'x-rapidapi-key': "api-key",
#    'x-rapidapi-host': "instagram29.p.rapidapi.com"
#    }

In [None]:
# Read in the data and see what variables are there
my_insta = json.loads(response.text)
my_insta.keys()

In [None]:
# Turn the data into a dataframe
insta_df = pd.read_json(my_insta)
insta_df.info()

## Reddit Example

In [31]:
# Option 1
url = "https://socialgrep.p.rapidapi.com/search/posts"

querystring = {"query":"conspiracy"}

headers = {
    'x-rapidapi-key': "api-key",
    'x-rapidapi-host': "socialgrep.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers, params=querystring)


# Option 2
#url = "https://reddit3.p.rapidapi.com/subreddit"

#querystring = {"url":"https://www.reddit.com/r/funny"}

#headers = {
#    'x-rapidapi-key': "api-key",
#    'x-rapidapi-host': "reddit3.p.rapidapi.com"
#    }

reddit = json.loads(response.text)
reddit.keys()

# Replace `record_path` with whatever variable you see after entering the .keys() command
reddit_df = pd.json_normalize(reddit, record_path = ['data'])


In [None]:
# This function actually works better thank the regular print() for printing out nested lists like this
pprint.pprint(reddit)

This function takes the text and puts it into one string for text analytic purposes

In [None]:
text = []

for post in reddit_df['title']:
    text.append(post)

print(text)

In [None]:
# Export the dataframe to a csv file
reddit_df.to_csv('reddit_data.csv')

## Open Weather Map Example

In [2]:
# For current weather data
url = "https://community-open-weather-map.p.rapidapi.com/weather"

querystring = {"q":"buffalo,us", "units":"imperial"}

headers = {
    'x-rapidapi-key': "api-key",
    'x-rapidapi-host': "community-open-weather-map.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

weather_dict = json.loads(response.text)

weather_df = pd.json_normalize(weather_dict)

In [None]:
# Getting a glimpse of the data with head() and the types of variables with dtypes()
weather_df.head()
weather_df.dtypes

In [None]:
# Printing out what the current weather is like
weather = weather_dict['weather']
print('The current weather condition in %s is %s.' % (weather_dict['name'],weather[0]['description']))
print('The current temperature is %s degrees Farenheit with %s percent humidity.' % (weather_dict['main']['temp'], weather_dict['main']['humidity']))
print('But it feels like it is %s degrees....' % (weather_dict['main']['feels_like']))

In [None]:
# To convert the unix timestamp into human readable format
import time, datetime
print(datetime.datetime.fromtimestamp(weather_df['sys.sunrise']))
print(datetime.datetime.fromtimestamp(weather_df['sys.sunset']))

In [None]:
# Selecting only columns of interest
weather_df[['name','main.temp','main.temp_min','main.temp_max','main.feels_like','main.humidity']]

## Hoaxy Example

In [None]:
url = "https://api-hoaxy.p.rapidapi.com/articles"

querystring = {"query":"Trump OR Biden AND date_published:[2020-03-30 TO 2020-12-31]","use_lucene_syntax":"true","sort_by":"relevant"}

headers = {
    'x-rapidapi-key': "api-key",
    'x-rapidapi-host': "api-hoaxy.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

hoaxy = json.loads(response.text)
hoax_df = pd.json_normalize(hoaxy, record_path = ['articles'])


print(hoaxy.keys())
print(hoax_df.head())


## Google Trends Example

In [None]:
url = "https://google-news.p.rapidapi.com/v1/search"

querystring = {"q":"news","country":"US","lang":"en"}

headers = {
    'x-rapidapi-key': "api-key",
    'x-rapidapi-host': "google-news.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

searchResults = json.loads(response.text)
df = pd.json_normalize(searchResults, record_path = ['articles'])
df.head()

In [None]:
pprint.pprint(searchResults)

In [28]:
df = pd.json_normalize(searchResults, record_path = ['articles'])
df['published'] = pd.to_datetime(df['published'])
df = df.sort_values(by='published', ascending=False)
df.head(10)
df.to_csv('google_news_data.csv')


## YouTube Example

In [2]:
url = 'https://serpapi.com/search.json?engine=youtube'

params = {
  "engine": "youtube",
  "search_query": "cats",
  "api_key": "api-key"
}

results = requests.get(url, params)

In [4]:
videoresults = json.loads(results.text)