# Requests & BeautifulSup
The requests library will make a get request to a web server, which will download the HTML contents of a given web page.

Beautiful soup library is used to parse an html document.

In [33]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [None]:
base_url = "https://movies.fandom.com"
url = base_url + "/wiki/Category:Disney_Transcripts"

In [81]:
def get_df_available_movies():
    
    page = requests.get(url)
    
    soup = BeautifulSoup(page.content, 'html.parser')
    
    list_available = soup.find_all('a', class_="category-page__member-link")
    
    movies_df = pd.DataFrame()
    for movie in list_available:
        link = movie["href"]
        title = movie["title"].split("/")[0]
        movies_df = movies_df.append({"link":link, "title":title}, ignore_index=True)

    return movies_df

def get_movie_transcript(movies_df, title_sel):
    
    sel = movies_df.loc[movies_df.title==title_sel]
    if len(sel) == 0:
        raise("Title {} not found. Are you sure you selected an existing title?".format(title_sel))
        
    link = sel.link.values[0]
    print(link)
        
    linkpage = requests.get(base_url+link)
    linksoup = BeautifulSoup(linkpage.content, "html.parser")
    text = linksoup.find_all("div", class_="mw-parser-output")[0].get_text()
    text_parse = text.split("\n")
    
    return text

In [68]:
movies_df = get_df_available_movies()
movies_df.head()

Unnamed: 0,link,title
0,/wiki/101_Dalmatians_(1996)/Transcript,101 Dalmatians (1996)
1,/wiki/102_Dalmatians/Transcript,102 Dalmatians
2,/wiki/A_Bug%27s_Life/Transcript,A Bug's Life
3,/wiki/A_Goofy_Movie/Transcript,A Goofy Movie
4,/wiki/Aladdin_(1992)/Transcript,Aladdin (1992)
...,...,...
96,/wiki/Treasure_Planet/Transcript,Treasure Planet
97,/wiki/Up_(2009)/Transcript,Up (2009)
98,/wiki/Valiant/Transcript,Valiant
99,/wiki/WALL-E/Transcript,WALL-E


In [69]:
movies_df.title.values

array(['101 Dalmatians (1996)', '102 Dalmatians', "A Bug's Life",
       'A Goofy Movie', 'Aladdin (1992)', 'Aladdin (2019)',
       'Aladdin and the King of Thieves', 'Angels in the Outfield (1994)',
       'Atlantis: The Lost Empire', 'Beauty and the Beast (1991)',
       'Beauty and the Beast: The Enchanted Christmas',
       'Beverly Hills Chihuahua 2',
       'Beverly Hills Chihuahua 3: Viva la Fiesta!',
       'Beverly Hills Chihuahua', 'Big Hero 6', 'Brave', 'Brother Bear',
       'Buzz Lightyear of Star Command: The Adventure Begins', 'Cars 2',
       'Cars 3', 'Cars', 'Chicken Little (2005)',
       'Cinderella II: Dreams Come True', 'Coco (2017 film)', 'Dinosaur',
       "Doug's 1st Movie", 'Finding Dory', 'Finding Nemo',
       'Frankenweenie (1984)', 'Frozen (2013)', 'The Good Dinosaur',
       'Hannah Montana: The Movie', 'Heavyweights', 'Hercules (1997)',
       'High School Musical 3: Senior Year', 'Home on the Range (2004)',
       'Honey, I Shrunk the Kids', 'Incredibl

In [82]:
text = get_movie_transcript(movies_df, "Aladdin (2019)")

/wiki/Aladdin_(2019)/Transcript


In [85]:
print(text)

Omar: Hey, look, over there!
Lian: Wow! Their ship is so big.
Omar: Wish ours was that fancy.
Lian: I'd be so happy if ours was that fancy cause then...
Peddler: Why is that? 'Cause is looks better? This boat has seen us through many storms. It may not look like much but, it has something theirs never will
Omar: What? Wood rot and rats?
Dalia: Are the children learning something, dear?
Peddler: It is unclear. All right, sit, children. I think it's time that I told you the story of Aladdin, the Princess, and the lamp.
Lian: What's so special about a lamp?
Peddler: Oh, this is a magic lamp.
Lian: Maybe if you sing.
Omar: It's better when you sing.
Peddler: No, no. No singing. It's been a long day.
**Start song: Arabian night**

Peddler: Oh, imagine a land
It's a faraway place
Where the caravan camels roamWhere you wander among every culture and tongueIt's chaotic, but hey, it's home.When the wind's from the east and the sun's from the westAnd the sand in the glass is rightCome on down, s