### <p align=center> **API - DATA QUERIES** <p>

In [1]:
#We import the libraries that we will use
from fastapi import FastAPI
import pandas as pd
from typing import Optional

In [2]:
#create our new instance 'app' 
app = FastAPI()

##### _You cand install in your deta cloud  here : https://deta.space/discovery/@gvbrilohenry/deta_

##### _And after that run with this link : https://deta-1-a5422253.deta.app/docs#_

In [3]:
# Define the csv
df = pd.read_csv("df_total_data.csv")

In [4]:
df

Unnamed: 0,movieId,score_prom,type,title,director,cast,country,date_added,release_year,rating,listed_in,description,duration_int,duration_type,platform
0,as1,3.47,movie,the grand seduction,don mckellar,"brendan gleeson, taylor kitsch, gordon pinsent",canada,2021-03-30,2014,g,"comedy, drama",a small fishing village must procure a local d...,113.0,min,amazon
1,as10,3.44,movie,david's mother,robert allan ackerman,"kirstie alley, sam waterston, stockard channing",united states,2021-04-01,1994,g,drama,sally goodson is a devoted mother to her autis...,92.0,min,amazon
2,as100,3.61,movie,wilder napalm,glenn gordon caron,"debra winger, dennis quaid, arliss howard, m. ...",,,1993,pg-13,"comedy, science fiction",two brothers with the secret power of starting...,109.0,min,amazon
3,as1000,3.56,movie,sinbad: make me wanna holla,jay chapman,sinbad,,,2014,16+,"arts, entertainment, and culture, comedy, docu...",watch the all-out stand-up special featuring a...,90.0,min,amazon
4,as1001,3.59,movie,simple gifts: the chamber music society at sha...,habib azar,,,,2016,g,documentary,a,84.0,min,amazon
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22993,ns995,3.52,movie,this lady called life,kayode kasum,"bisola aiyeola, efa iwara, molawa onajobi, tin...",nigeria,2021-04-23,2020,tv-14,"dramas, international movies, romantic movies","abandoned by her family, young single mother a...",120.0,min,netflix
22994,ns996,3.63,movie,vizontele,"yılmaz erdoğan, ömer faruk sorak","yılmaz erdoğan, demet akbağ, altan erkekli, ce...",turkey,2021-04-23,2001,tv-ma,"comedies, dramas, international movies","in 1974, a rural town in anatolia gets its fir...",106.0,min,netflix
22995,ns997,3.53,movie,homunculus,takashi shimizu,"go ayano, ryo narita, yukino kishii, anna ishi...",japan,2021-04-22,2021,tv-ma,"horror movies, international movies, thrillers",truth and illusion blurs when a homeless amnes...,116.0,min,netflix
22996,ns998,3.58,tv show,life in color with david attenborough,,david attenborough,"australia, united kingdom",2021-04-22,2021,tv-pg,"british tv shows, docuseries, international tv...","using innovative technology, this docuseries e...",1.0,season,netflix


In [5]:
@app.get("/")
def read_root():
    return {"Hello!": "Welcome!"}

#### _Setpoint 1: Movie with longer duration with optional filters of year , platform and type of duration .(the function should be called get_max_duration(year, platform, duration_type))_

In [6]:
@app.get("/get_max_duration")
def get_max_duration(year: Optional[int] = None, platform: Optional[str] = None, duration_type: Optional[str] = 'min'):

    if duration_type is not None and duration_type not in ['min', 'season']:
        return {"Error": "Duration must be one of the following: min, season"}

    df_movies = df

    if year:
        df_movies = df_movies[df_movies.release_year == year]

    if platform:

        
        # We pass platform to lowercase in case a user writes it in uppercase
        platform = platform.lower()

        
        # We check that the entered platform is correct
        platforms = ["amazon", "disney", "hulu", "netflix"]

        if platform not in platforms:
            return {"Error": "Wrong platform! You must enter one of the following: amazon, disney, hulu, netflix"}

        df_movies = df_movies[df_movies.platform == platform ]

    # We check that the duration_type is valid
    if duration_type:
        
        # We pass duration_type to lowercase in case a user writes it in uppercase
        duration_type = duration_type.lower()

        df_movies = df_movies[df_movies.duration_type == duration_type]

    if not df_movies.empty:
        max_duration_movie = df_movies.sort_values('duration_int', ascending=False).iloc[0]['title']

    else:
        return {"Error": "No movie was found with the given parameters."}

    return {"max_duration_movie": max_duration_movie}


In [7]:
#Test Setpoint 1 : 
get_max_duration(2012, 'amazon','min')

{'max_duration_movie': 'jab tak hai jaan'}

#### _Setpoint 2: Number of films by platform with a score greater than XX in a given year (the function should be called get_score_count(platform, scored, year))_

In [8]:
@app.get("/get_score_count/{platform}/{scored}/{year}")
def get_score_count(platform: str, scored: float, year: int):
    
    # We check that the entered platform is correct
    platform = platform.lower()

    platforms = {"amazon","disney","hulu","netflix"}

    if platform not in platforms:
        return {"Error": "Wrong platform! You must enter one of the following: amazon, disney, hulu, netflix"}

    # Verify that the scored range is valid. Should not be less than 0 or greater than 5
    if scored < 0 or scored > 5:
        return {"Error": "Score must be between 0 and 5."}

    # Filter by platform and year
    df_filtered = df[(df.platform == platform) & (df.release_year == year)]

    # Filter by score
    df_filtered = df_filtered[df_filtered.score_prom >= scored]

    # Count the movies
    count = df_filtered.shape[0]

    if count == 0:
        return {"Error": "No movies were found with the given parameters."}

    return {"count": count}

In [9]:
#Test Setpoint 2 : 
get_score_count('amazon', 3, 2015)

{'count': 378}

#### _Setpoint 3: Number of movies by platform with Platform filter (The function must be called get_count_platform(platform))_

In [10]:
@app.get('/get_count_platform/{platform}')
def get_count_platform(platform: Optional[str] = None):

    # We check that the entered platform is correct
    if platform is not None and platform.lower() not in ['disney', 'amazon', 'hulu', 'netflix']:
        return {"Error": "The platform must be one of the valid options: Disney Plus, Amazon Prime, Hulu or Netflix."}
    
    # Filter the movies for the specified platform
    df_filtered = df[df.platform.str.lower() == platform.lower()] if platform is not None else df[df.type == 'movie']

    # Verify that there is at least one movie that meets the filters
    if df_filtered.empty:
        return {"Error": "There are no movies for that platform."}
    
    # Group by platform and count the number of resulting rows
    dict_count = df_filtered.groupby('platform').size().to_dict()

    count = dict_count[platform.lower()] if platform is not None else len(dict_count)

    return {"count": count}

In [11]:
#Test Setpoint 3 : 
get_count_platform('amazon')

{'count': 9668}

#### _Setpoint 4: Actor who is most repeated according to platform and year. (The function should be called get_actor(platform, year))_

In [12]:
@app.get('/get_actor/{platform},{year}')
def get_actor(platform: str = None, year: int = None):
    
    # We check that the entered platform is correct
    if platform is not None and platform.lower() not in ['disney', 'amazon', 'hulu', 'netflix']:
        return {"Error": "The platform must be one of the valid options: Disney Plus, Amazon Prime, Hulu or Netflix."}
   
    # Verify that the year is within the valid range
    if year is not None and (year <= 1920 or year >= 2021):
        return {"Error": "The year must be within the range of 1920 to 2021."}

        # Filter movies for the specified platform and year
    df_plataformaAño = df[(df.platform == platform) & (df.release_year == year)]
    
    # Separate the names of the actors and create a row for each one
    df_cast = df_plataformaAño.assign(cast=df_plataformaAño.cast.str.split(', ')).explode('cast')
    
    # Count the number of appearances of each actor
    actor_counts = df_cast.cast.value_counts()
    
    # Obtain the most repeated actor and his number of appearances
    max_actor = actor_counts.index[0]

    max_count = int(actor_counts.iloc[0])
    
    # Create a dictionary to be able to see the results
    famousactor = {'actor': max_actor, 'count': max_count}
    
    return famousactor 

In [13]:
# Test Setpoint 4: 
get_actor('amazon', 2015)

{'actor': 'mother goose club', 'count': 3}

##### _You cand download here : https://deta.space/discovery/@gvbrilohenry/deta_

##### _For run the interface aplication locally : http://127.0.0.1:8000/docs#/_