## API TMDb 

#### A.  Search for movies in the “Comedy” genre released in the year 2000 or later. Retrieve the 300 most popular movies in this genre. The movies should be sorted from most popular to least popular.

In [146]:
%run ./tmdb_keys.ipynb

In [147]:
import requests
import time
import pandas as pd

In [148]:
def getData(pageNo):
    url = "https://api.themoviedb.org/3/discover/movie?api_key="+api_key+\
    "&with_genres=Comedy"+\
    "&primary_release_date.gte=2000-01-01"+\
    "&sort_by=popularity.desc"+\
    "&page="+str(pageNo)
    itr_cnt = 50
    while(itr_cnt):
        r=requests.get(url)
        if(r.status_code == 200):
            break
        time.sleep(1)
        itr_cnt = itr_cnt -1
    movie_data = r.json()
    movies=[]
    for mv in movie_data['results']:
        data = []
        data.append(mv['id'])
        data.append(mv['original_title'])
        movies.append(data)
    return movies

In [149]:
result=[]
no_pages = 15 # 20 * 15 = 300
for i in range(1,no_pages+1):
    result += getData(i)
result[:5]

[[566525, 'Shang-Chi and the Legend of the Ten Rings'],
 [512195, 'Red Notice'],
 [580489, 'Venom: Let There Be Carnage'],
 [585245, 'Clifford the Big Red Dog'],
 [370172, 'No Time to Die']]

In [150]:
df1 = pd.DataFrame(result)
df1.to_csv("movie_ID_name.csv", index=False, header=None)

####  B. For each of the 300 movies, use the API to find its 5 similar movies. If a movie has fewer than 5 similar movies, the API will return as many as it can find. Your code should be flexible to work with however many movies the API returns. 

In [151]:
def simMovie(mov_id):
    url = "https://api.themoviedb.org/3/movie/"+str(mov_id)+\
    "/similar?api_key="+api_key+\
    "&language=en-US"+\
    "&page=1"
    itr_cnt = 50
    while(itr_cnt):
        r_main=requests.get(url)
        if(r_main.status_code == 200):
            break
        time.sleep(1)
        itr_cnt = itr_cnt -1
    sim_data = r_main.json()
    sim_mov = []
    if(len(sim_data['results']) > 5):
        sim_data = sim_data['results'][:5]
    else:
        sim_data = sim_data['results']
    sim_mov = [x['id'] for x in sim_data]
    return sim_mov

In [152]:
result2=[]
for main_mov_id in df1.loc[:,0]:
    sim_mv = simMovie(main_mov_id)
    for sub_mv_id in sim_mv:
        if(sub_mv_id < main_mov_id):
            result2.append([sub_mv_id,main_mov_id])
        else:
            result2.append([main_mov_id,sub_mv_id])
result2[:5]

[[62764, 566525],
 [59859, 566525],
 [60304, 566525],
 [59440, 566525],
 [64328, 566525]]

In [153]:
df2 = pd.DataFrame(result2)
df2.drop_duplicates(keep='first',inplace=True)
df2.to_csv("movie_ID_sim_movie_ID.csv", index=False, header=None)