# Import Libraries
### For web scrapping

the data will be taken from imdb website: https://www.imdb.com/search/title/?country_of_origin=id&sort=release_date,asc&start=1

## Import Libraries

In [346]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from requests import get
from bs4 import BeautifulSoup as Soup
from bs4 import NavigableString, Tag

## Get the site necessity 

In [407]:
url = get("https://www.imdb.com/search/title/?country_of_origin=id&sort=release_date,desc&start=51&ref_=adv_nxt")

In [409]:
request = url.text

### Parse the whole site 

In [410]:
total_soup = Soup(request,'html.parser')

### Get the part related to the Movie profiles 

In [411]:
total_movies = total_soup.findAll('div',{'class':'lister-item mode-advanced'})

### Find the number of possible pages that wee need to get all movies

#### In this case, it is 69, as the movie list has 3490 entry, and each page contains 50 movies.

In [412]:
total_movie = total_soup.find('div', {'class':'desc'}).find('span').text
page_count = total_movie[total_movie.find('of ')+3:total_movie.find(' titles')].replace(',','')
pages = int(float(page_count)/50)
print(pages)

69


## Script below is to get List of Movies 

In [426]:
Title = []
Year = []
Synopsis = []
Runtime = []
Genre = []
Rating = []
Actors =[]
Director = []
Votes = []

for pag in range(pages):
    page = (pag*50)+1
    url = "https://www.imdb.com/search/title/?country_of_origin=id&sort=release_date,desc&start={}&ref_=adv_nxt".format(page)
    request = get(url).text
    soup_data = Soup(request,'html.parser')
    movies = soup_data.findAll('div',{'class':'lister-item mode-advanced'})
    for mov in movies:
        #TITLE
        Title.append(mov.h3.a.text)

        #YEAR
        try:
            Year.append(mov.find('span',{"class": "lister-item-year text-muted unbold"}).text[1:5])
        except TypeError:
            Year.append(np.nan)
        except AttributeError:
            Year.append(np.nan)
            
        #SYNOPSIS
        try:
            Synopsis.append(mov.find_all('p',{"class": "text-muted"})[1].text[5:])
        except TypeError:
            Synopsis.append(np.nan)
        except AttributeError:
            Synopsis.append(np.nan)

        #RUNTIME
        try:
            Runtime.append(mov.find('span',{"class": "runtime"}).text[:-4])
        except AttributeError:
            Runtime.append(np.nan)
        except TypeError:
            Runtime.append(np.nan)

        #GENRE
        try:
            Genre.append(mov.find('span',{"class": "genre"}).text[1:-12])
        except AttributeError:
            Genre.append(np.nan)
            
        #RATING
        try:
            Rating.append(mov.find('div',{"class":"inline-block ratings-imdb-rating"})["data-value"])
        except AttributeError:
            Rating.append(np.nan)
        except TypeError:
            Rating.append(np.nan)

        #VOTES
        try:
            Votes.append(mov.find('span', {"name": "nv"}).text)
        except AttributeError:
            Votes.append(np.nan)
        except TypeError:
            Votes.append(np.nan)

        #ACTORS
        Actors.append([a.text for a in mov.find('p',class_='').find_all('a')[1:]])

        #DIRECTOR
        try:
            Director.append(mov.find('p',class_='').find_all('a')[0].text)
        except AttributeError:
            Director.append(np.nan)
        except IndexError:
            Director.append(np.nan)
#         print(i)

In [441]:
list=[Title, Year, Synopsis, Runtime, Genre, Rating, Votes, Director, Actors]
df = pd.DataFrame(list).T
df.columns = ["Title", "Year", "Synopses", "Runtime", "Genre", "Rating", "Votes", "Director", "Actors"]
df.head(200)

Unnamed: 0,Title,Year,Synopses,Runtime,Genre,Rating,Votes,Director,Actors
0,Lingering,Vide,a Plot\n,,Mystery,,,Dewi Nurfitri,"[Stephen J. Pena, Ryoma Ishizuka, Erin Evans-W..."
1,The Teacher,II),a Plot\n,,Comedy,,,Estelle Linden,"[Estelle Linden, Yova Gracia, Yuven]"
2,Setan Munafik,,Group of Bunch of youngster who like a shopist...,,"Horror, Mystery, Thriller",,,Yosua Rocky,"[Majed Salleh, Nabil Ahmad, Jalaluddin Hassan,..."
3,Esok Yang Lebih Baik,,a Plot\n,,"Action, Crime, Drama",,,Baf Sjamsuddin,"[Daniel Adnan, Yusof B. Mohd Kassim, Revaldo, ..."
4,Ghost in the Cell,,a Plot\n,,,,,Joko Anwar,[]
...,...,...,...,...,...,...,...,...,...
195,Payal Dev Feat. Radhika Bangia: Saara India!,2020,a Plot\n,3,"Short, Music",,,Mudassar Khan,"[Radhika Bangia, Payal Dev]"
196,The Mother's Land,II),a Plot\n,,"Short, Fantasy",,,Kevin Rahardjo,"[Mikael Farady, Sifra Magdalena]"
197,The Mother's Land,I) (,A tale of man who tries to captivate the mothe...,,"Short, Fantasy",,,Kevin Rahardjo,"[Mikael Farady, Sifra Magdalena]"
198,Ruwatan,2020,Accompanying her blind mother undergo alternat...,15,"Short, Drama",,,Ernest Lesmana,"[Vonny Anggraini, Johannes de Britto, Anne Yas..."


In [443]:
df[df["Director"] == "Joko Anwar"].sort_values(by="Year", ascending = False)

Unnamed: 0,Title,Year,Synopses,Runtime,Genre,Rating,Votes,Director,Actors
1038,Jenny,III),A woman gets challenged from a child to play B...,10.0,"Short, Horror",,,Joko Anwar,"[Asmara Abigail, M. Adhiyat]"
970,Don't Blink,II),a Plot\n,1.0,"Short, Horror",,,Joko Anwar,[Asmara Abigail]
323,Impetigore,2019,"Maya with her best friend, Dini, tries to surv...",106.0,"Drama, Horror, Mystery",6.7,2776.0,Joko Anwar,"[Tara Basro, Ario Bayu, Marissa Anita, Christi..."
357,Gundala,2019,Indonesia's preeminent comic book superhero an...,123.0,"Action, Adventure, Drama",6.3,3507.0,Joko Anwar,"[Abimana Aryasatya, Tara Basro, Bront Palarae,..."
830,Satan's Slaves,2017,After dying from a strange illness that she su...,107.0,"Drama, Horror, Mystery",6.6,7298.0,Joko Anwar,"[Tara Basro, Bront Palarae, Dimas Aditya, Endy..."
842,Jalanin Aja,2017,The ordinary journey turns out to be extraordi...,,"Short, Drama",,,Joko Anwar,[]
1237,A Copy of My Mind,2015,A female cheap salon worker and a pirated DVDs...,116.0,Drama,7.2,728.0,Joko Anwar,"[Tara Basro, Chicco Jerikho, Maera Panigoro, P..."
1636,Durable Love,2012,A single independent girl met a cute geek who ...,11.0,"Short, Drama, Romance",,,Joko Anwar,"[Karina Salim, Sadha Triyudha]"
1664,Grave Torture,2012,Mourning the death of his serial killer father...,9.0,"Short, Horror",6.9,39.0,Joko Anwar,"[Noboru Iguchi, Erik Matti, Ming Jin Woo, Isma..."
1710,Ritual,2012,A man must save his two children when they dis...,87.0,Thriller,5.5,2540.0,Joko Anwar,"[Rio Dewanto, Hannah Al Rashid, Izzi Isman, Ar..."
