In [277]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv
import numpy as np
import time
from tqdm import tqdm_notebook as tqdm
import re
from urllib.request import Request, urlopen

# Scrape Goodreads

In [278]:
number_of_pages = list(range(1,3))
all_books = []

for number in number_of_pages:
    page = requests.get(f'https://www.goodreads.com/list/show/7.Best_Books_of_the_21st_Century?page={number}')
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find('table', attrs={'class': 'tableList js-dataTooltip'})
    table_body = table.find_all('tr')

    for entry in table_body:
        try:
            hrefs = entry.find_all(href=True)

            #get the book title
            title = hrefs[0]['title']

            #get the book author
            author = hrefs[2].get_text()

            #get the book link
            link = hrefs[0]['href']

            #get the description of the book through its link
            page2 = requests.get(f'https://www.goodreads.com{link}')
            soup2 = BeautifulSoup(page2.content, 'html.parser')

            #select the description box
            description_box = soup2.find_all('div', attrs={'id': 'description'})

            #get the summary of each book
            book_text = description_box[0].find_all('span')[1].get_text()

            #book pictures
            image_box = soup2.find_all('img', attrs={'id' : 'coverImage'})
            picture = image_box[0]['src']

            #number of pages
            page_box = soup2.find_all('span', attrs={'itemprop':'numberOfPages'})
            page_count = page_box[0].get_text()

            #genres
            genre_box = soup2.find_all('a', attrs={'class':'actionLinkLite bookPageGenreLink'})
            genre_list = []
            for genre in genre_box:
                genre_list.append(genre.get_text())

            #add to dictionary
            row_profile = {'name':title, 'author':author, 'link':link, 'text':book_text,
                           'picture':picture, 'page number':page_count, 'genres':genre_list}

            all_books.append(row_profile)

            print(title)
        except:
            pass
    print(f'page {number} is done!')
    
f = open("books.csv", "w")
writer = csv.DictWriter(f, fieldnames=['name', 'author', 'link', 'text', 'picture', 'page number', 'genres'])
writer.writeheader()
writer.writerows(all_books)
f.close()

Harry Potter and the Deathly Hallows
The Hunger Games
The Kite Runner
The Book Thief
Harry Potter and the Half-Blood Prince
Harry Potter and the Order of the Phoenix
The Help
A Thousand Splendid Suns
Catching Fire
The Girl with the Dragon Tattoo
The Time Traveler's Wife
The Fault in Our Stars
The Da Vinci Code
The Road
Water for Elephants
The Curious Incident of the Dog in the Night-Time
Mockingjay
The Lovely Bones
Middlesex
Divergent
Atonement
The Lightning Thief
The Shadow of the Wind
City of Bones
American Gods
The Secret Life of Bees
Extremely Loud and Incredibly Close
The Glass Castle
My Sister's Keeper
Angels & Demons
Never Let Me Go
The Girl Who Played with Fire
Cloud Atlas
The Host
Breaking Dawn
The Guernsey Literary and Potato Peel Pie Society
Eclipse
The Girl Who Kicked the Hornet's Nest
Eat, Pray, Love
New Moon
City of Glass
Kafka on the Shore
The Name of the Wind
The Graveyard Book
Room
City of Ashes
Freakonomics: A Rogue Economist Explores the Hidden Side of Everything
The

# Scrape Flixable Netflix Movies

In [279]:
page_iterations = list(range(1,100))
all_movies = []

for iteration_count in page_iterations:

    page = requests.get(f'https://flixable.com/genre/movies/?min-rating=0&min-year=1920&max-year=2020&order=date&page={iteration_count}')
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all('div', attrs={'class': 'col-sm-6 col-lg-3 mb-3'})

    for entry in table:
        try:
            href = entry.find('a', attrs={'class':'poster-link'})['href']
            page2 = requests.get(f'https://flixable.com{href}')
            soup2 = BeautifulSoup(page2.content, 'html.parser')

            #title
            title = soup2.find('h1', attrs={'class':'mb-3'}).get_text().strip()
            #image
            image = soup2.find('img', attrs={'class':'poster mb-4'})['src']
            #year
            year = soup2.find('span', attrs={'class':'mr-2'}).get_text()

            row = soup2.find('div', attrs={'class':'mb-2'})

            movie_traits = row.find_all('span')
            # release year
            release_year = movie_traits[0].get_text()
            # rating
            rating = movie_traits[1].get_text()
            # runtime
            runtime = movie_traits[2].get_text()

            movie_box = soup2.find('div', attrs={'class':'col-md-8'})
            # movie plot
            plot = movie_box.find('div', attrs={'class':'mb-3'}).get_text().strip()
            # movie genres
            genres = movie_box.find_all('div', attrs={'class':'mb-2'})[1].get_text()
            movie_genres = genres.replace('Genres:','').replace(',','').replace('Movies','').replace('&','').strip().split()

            #add to dictionary
            full_movie_entry = {'title':title, 'image':image, 'year':year, 
                                'release_year':release_year, 'rating':rating, 'runtime':runtime,
                                'plot':plot, 'genres':movie_genres}

            all_movies.append(full_movie_entry)
            print(f'{title} is done')
        except:
            pass

f = open("movie.csv", "w")
writer = csv.DictWriter(f, fieldnames=['title', 'image', 'year', 'release_year', 'rating', 'runtime', 'plot', 'genres'])
writer.writeheader()
writer.writerows(all_movies)
f.close()

A Ghost Story is done
Live Twice, Love Once is done
The Tourist is done
All the Freckles in the World is done
Deewangee is done
Ferris Bueller's Day Off is done
Half Girlfriend is done
21 is done
A Cinderella Story is done
Alpha and Omega: The Legend of the Saw Tooth Cave is done
American Beauty is done
Arthur Christmas is done
Aruna & Her Palate is done
As Good as It Gets is done
Bad Grandpa .5 is done
Breaking the Bank is done
Catch Me If You Can is done
Center Stage is done
Charlie and the Chocolate Factory is done
Chasing Amy is done
Chitty Chitty Bang Bang is done
Chloe is done
City of God is done
Click is done
Cloverfield is done
Coach Carter is done
Dinner for Schmucks is done
Donnie Brasco is done
Dragonheart is done
Dragonheart 3: The Sorcerer is done
Dragonheart: A New Beginning is done
Event Horizon is done
Fall Girls is done
Final Destination is done
Final Destination 2 is done
Final Destination 3 is done
Free Willy is done
Friday the 13th is done
Garfield Gets Real is done

True: Winter Wishes is done
Pranaam is done
What the F* Is Going On? is done
Awake is done
Popeye is done
Shot Caller is done
Charlotte's Web is done
End of Watch is done
Hamburger Hill is done
The Time Machine is done
Brother is done
Girls und Panzer der Film is done
Mike Birbiglia: What I Should Have Said Was Nothing: Tales from My Secret Public Journal is done
Shelby American is done
K-19: The Widowmaker is done
The Knight Before Christmas is done
Adore is done
Be Somebody is done
Beavis and Butt-head Do America is done
Bebe's Kids is done
Bikram: Yogi, Guru, Predator is done
Black Snake Moan is done
Carriers is done
Dorasaani is done
Dream/Killer is done
Evolution is done
Eye for an Eye is done
Hot Rod is done
It Takes Two is done
Lorena, Light-Footed Woman is done
Love, Rosie is done
Mallesham is done
Once Upon a Time in the West is done
She's Out of My League is done
Superstar is done
The Adventures of Tintin is done
The Devil Inside is done
The First Wives Club is done
The Gift 

Head Count is done
Hello, Privilege. It's Me, Chelsea is done
Locked on You is done
Shanghai Fortress is done
Tall Girl is done
Time Trap is done
Turbo is done
Bill Burr: Paper Tiger is done
Eat Pray Love is done
Evelyn is done
Our Godfather is done
Norm of the North: King Sized Adventure is done
Article 15 is done
Care of Kancharapalem is done
Ee Nagaraniki Emaindi is done
Kill Me If You Dare is done
American Factory: A Conversation with the Obamas is done
Saawan is done
The Heretics is done
The World We Make is done
Watchman is done
68 Kill is done
American Psycho is done
Elena is done
First Kill is done
For the Birds is done
Igor is done
Mokalik (Mechanic) is done
October 1 is done
Olmo & the Seagull is done
Phone Swap is done
Rachel Getting Married is done
Spookley the Square Pumpkin is done
Stripes is done
The Bridge is done
The CEO is done
The Last Exorcism is done
The Saint is done
The Taking of Pelham 123 is done
Uncle Naji in UAE is done
Used Goods is done
You Don't Mess with 

#Selfie is done
#Selfie 69 is done
50/50 is done
III Smoking Barrels is done
Life in the Doghouse is done
Love Is a Story is done
Oh, Ramona! is done
Prosecuting Evil: The Extraordinary World of Ben Ferencz is done
Righteous Kill is done
Satan & Adam is done
Small Soldiers is done
The Space Between Us is done
Then Came You is done
We Are Legends is done
Always Be My Maybe is done
C Kkompany is done
Chopsticks is done
Koi Aap Sa is done
Krishna Cottage is done
Mere Pyare Prime Minister is done
P is done
Pegasus is done
Ragini MMS is done
Ragini MMS 2 is done
Shor In the City is done
My Week with Marilyn is done
Svaha: The Sixth Finger is done
The One I Love is done
Lifechanger is done
The Nutcracker and the Four Realms is done
After Porn Ends is done
Rampant is done
After Maria is done
Joy is done
Rim of the World is done
The Perfection is done
The Lonely Island Presents: The Unauthorized Bash Brothers Experience is done
A Tale of Two Kitchens is done
The Man Who Feels No Pain is done
M

Hajwala 2: Mysterious Mission is done
Juanita is done
Lady J is done
The Imaginarium of Doctor Parnassus is done
The Jane Austen Book Club is done
Walk. Ride. Rodeo. is done
Doubt is done
Nappily Ever After is done
Sisterakas is done
Starting Over Again is done
That Thing Called Tadhana is done
The Love Affair is done
Bedtime Stories is done
Caregiver is done
Christopher Robin is done
The Hard Way is done
Everyday I Love You is done
I Am is done
Kid Kulafu is done
My Only Mother is done
No Other Woman is done
The Dawn Wall is done
Exit Strategy is done
Middle School: The Worst Years of My Life is done
Contract is done
Dear Dad is done
River's Edge is done
Sarkar is done
Sarkar is done
Saving Mr. Banks is done
Wakefield is done
War Chhod Na Yaar is done
Weapon of Choice is done
Zubaan is done
2,215 is done
An Interview with God is done
BNK48: Girls Don't Cry is done
Budapest is done
Christmas in the Smokies is done
Crouching Tiger, Hidden Dragon is done
Dead in a Week (Or Your Money Bac

Black Mirror: Bandersnatch is done
Life Ki Toh Lag Gayi is done
Oasis: Supersonic is done
Rehmataan is done
The Birth Reborn 3 is done
When Angels Sleep is done
At First Light is done
Avengers: Infinity War is done
Backtrack is done
The Little Hours is done
I Am Vengeance is done
Mr. Church is done
Bad Seeds is done
Bird Box is done
Struggle: The Life and Lost Art of Szukalski is done
The Last Hangover is done
23:59 is done
Aggretsuko: We Wish You a Metal Christmas is done
Imperfect is done
Lang Tong is done
Miss J Contemplates Her Choice is done
Ascharyachakit! is done
The Humanity Bureau is done
Lilli is done
Prince of Peoria: A Christmas Moose Miracle is done
Skin Trade is done
American Circumcision is done
Andhadhun is done
Dragons: Dawn of the Dragon Racers is done
Kill the Messenger is done
Maps to the Stars is done
One Day is done
Springsteen on Broadway is done
The Theory of Everything is done
F.R.E.D.I. is done
Kita Kita is done
Once in a Lifetime Sessions with George Ezra is 

Delhi 6 is done
Dev.D is done
Dil Vil Pyaar Vyaar is done
Dushman is done
Ek Main Aur Ekk Tu is done
Fishtail is done
Freedom at Midnight is done
Haani is done
Happy Go Lucky is done
Harishchandrachi Factory is done
Jagga Jasoos is done
Jatt James Bond is done
Jodhaa Akbar is done
Judge Singh LLB is done
Kaminey is done
Kevin Hart: Laugh at My Pain is done
Kevin Hart: Seriously Funny is done
Khido Khundi is done
Khoobsurat is done
Kisaan is done
Kismat Konnection is done
Kurbaan is done
Lock is done
Luv Shuv Tey Chicken Khurana is done
Main, Meri Patni Aur Woh is done
Manje Bistre is done
Motor Mitraan Di is done
Mountain is done
Mugamoodi is done
Muran is done
My Friend Pinto is done
My Little Pony Equestria Girls: Forgotten Friendship is done
My Little Pony Equestria Girls: Rollercoaster of Friendship is done
My Suicide is done
Naan Sigappu Manithan is done
Nasha is done
Naughty Jatts is done
Needhi Singh is done
Peace Haven is done
Phantom is done
Pizza is done
Pooja Kiven Aa is don

Locke is done
Lucha: Playing the Impossible is done
Spring Breakers is done
Stray Bullet is done
Tahaan is done
Tango Feroz is done
Temporada de Caza is done
The Adderall Diaries is done
The Spectacular Now is done
Under The Skin is done
Colonia is done
How It Ends is done
Jim Jefferies: This Is Me Now is done
Kevin Hart: Let Me Explain is done
Romina is done
The Signal is done
Us and Them is done
100 Days Of Solitude is done
Luciano Mellera: Infantiloide is done
The Legacy of a Whitetail Deer Hunter is done
The Skin of the Wolf is done
White Fang is done
Blue Valentine is done
Sommore: Chandelier Status is done
All the Boys Love Mandy Lane is done
Felipe Esparza: They're Not Going to Laugh at You is done
Joan Rivers: Don't Start with Me is done
Katt Williams: Kattpacalypse is done
Dance Academy: The Comeback is done
King of Peking is done
Angamaly Diaries is done
Anthony Kaun Hai? is done
Bo Burnham: what. is done
Cappuccino is done
Chalte Chalte is done
Chicken Kokkachi is done
Count

Martyrs of Marriage is done
Pizza, birra, faso is done
Ranbhool is done
Satrangi Re is done
Sthaniya Sambaad is done
The Art of Loving is done
Steel Rain is done
Ricky Gervais: Humanity is done
Stretch Armstrong: The Breakout is done
Kygo: Live at the Hollywood Bowl is done
Troy: The Odyssey is done
Bad Match is done
Bitch is done
Nails is done
Septiembre, un llanto en silencio is done
Meditation Park is done
The Outsider is done
Ladies First is done
Aftershock is done
Bullet Head is done
Aliens Ate My Homework is done
Benji is done
Benji: Off the Leash is done
For the Love of Benji is done
Gad Elmaleh: American Dream is done
Gook is done
F the Prom is done
Expedition China is done
Malena Pichot: Estupidez compleja is done
Natalia Valdebenito: El especial is done
Ravenous is done
2307: Winter's Dream is done
Adel Karam: Live from Beirut is done
Ai Weiwei: Never Sorry is done
Aitraaz is done
Bachelor Girls is done
Balu Mahi is done
Battle Drone is done
Boom is done
Company is done
Death

Chasing Trane is done
Long Time Running is done
Bon Cop Bad Cop 2 is done
Bill Burr: Let It Go is done
Mea Culpa is done
Natalia Valdebenito: Gritona is done
The Killer is done
Jerry Seinfeld: Comedian is done
The Journey Is the Destination is done
Let It Fall: Los Angeles 1982-1992 is done
Mubarakan is done
Williams is done
6-5=2 is done
Bandie is done
Beiimaan Love is done
Coffee with D is done
Getaway Plan is done
Golden Time is done
Hajwala: The Missing Engine is done
Hitler's Steel Beast is done
Kacche Dhaagey is done
Kis Kisko Pyaar Karoon is done
Moor is done
One Heart: The A.R. Rahman Concert Film is done
Pettersson and Findus 2 is done
Punjab 1984 is done
Rivaaz is done
Romeo Ranjha is done
Roots is done
Savage Dog is done
The Bittersweet is done
Trip to Bhangarh: Asia's Most Haunted Place is done
Undercover Grandpa is done
Waarrior Savitri is done
Warehoused is done
Yaara O Dildaara is done
Judah Friedlander: America Is the Greatest Country in the United States is done
The B-

Anarkali of Aarah is done
Annabelle Hooper and the Ghosts of Nantucket is done
Below Her Mouth is done
Cop Watchers is done
Handsome Devil is done
Jessica Darling's It List is done
Maz Jobrani: Immigrant is done
Opening Night is done
Raising the Bar is done
The Blind Christ is done
The Bomb is done
The Truth About Alcohol is done
The Women Who Kill Lions is done
Un plus une is done
Loco Love is done
Diana: 7 Days That Shook the World is done
The Incredible Jessica James is done
Joe Mande’s Award-Winning Comedy Special is done
I Called Him Morgan is done
Victor is done
City of Tiny Lights is done
Houston, We Have a Problem! is done
Aditi Mittal: Things They Wouldn't Let Me Say is done
Theater of Life is done
Uncertain Glory is done
Amar is done
Singh Saab the Great is done
Vitti Dandu is done
Buddy Thunderstruck: The Maybe Pile is done
Chasing Coral is done
To the Bone is done
D.L. Hughley: Clear is done
My Scientology Movie is done
Tom Segura: Completely Normal is done
Sex Doll is done

Deidra & Laney Rob a Train is done
Naledi: A Baby Elephant's Tale is done
Pandora is done
5 to 7 is done
Coraline is done
Howards End is done
The Hollow Point is done
The Riot Club is done
Asoka is done
Bon Cop, Bad Cop is done
Les Bleus - Une autre histoire de France, 1996-2016 is done
Namour is done
Notes on Blindness is done
One 2 Ka 4 is done
Ottaal is done
Sunrise is done
Angry Indian Goddesses is done
Jim Norton: Mouthful of Shame is done
100 Meters is done
Bana Masal Anlatma is done
Berlin Kaplani is done
Burning Sands is done
Çarsi Pazar is done
Çok Filim Hareketler Bunlar is done
Dedemin Fisi is done
Deliha is done
Düğün Dernek is done
Dügün Dernek 2: Sünnet is done
Eyyvah Eyyvah is done
Eyyvah Eyyvah 2 is done
Eyyvah Eyyvah 3 is done
G.O.R.A is done
Hayat Öpücügü is done
Hokkabaz is done
Hükümet Kadin is done
Hükümet Kadin 2 is done
Incir Receli 2 is done
Kara Bela is done
Kocan Kadar Konus is done
Kocan Kadar Konus 2: Dirilis is done
Kurtulus Son Durak is done
Mucize is done

Raiders!: The Story of the Greatest Fan Film Ever Made is done
The White Helmets is done
Natural Born Pranksters is done
Sample This is done
Extremis is done
Jandino: Whatever it Takes is done
Elstree 1976 is done
A Noble Intention is done
Bon Bini Holland is done
Dukhtar is done
Figaro Pho is done
Full Out is done
Holy Hell is done
Off Course is done
Saudi Arabia Uncovered is done
We Need to Talk is done
Blue Is the Warmest Color is done
Jeff Foxworthy and Larry the Cable Guy: We’ve Been Thinking... is done
XOXO is done
Abzurdah is done
Dhanak is done
I'll Sleep When I'm Dead is done
Being AP is done
John & Jane is done
Real Crime: Supermarket Heist (Tesco Bomber) is done
13 Cameras is done
Jim Gaffigan: Beyond the Pale is done
Jim Gaffigan: King Baby is done
Jim Gaffigan: Mr. Universe is done
Jim Gaffigan: Obsessed is done
David Cross: Making America Great Again! is done
The Little Prince is done
Lavell Crawford: Can a Brother Get Some Love? is done
APEX: The Story of the Hypercar is

# Scrape Flixable Netflix TV shows

In [280]:
page_iterations = list(range(1,49))
all_tv = []

for iteration_count in page_iterations:

    page = requests.get('https://flixable.com/genre/tv-shows/?min-rating=0&min-year=1920&max-year=2020&order=date&page={}'.format(iteration_count))
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find_all('div', attrs={'class': 'col-sm-6 col-lg-3 mb-3'})

    for entry in table:
        try:
            href = entry.find('a', attrs={'class':'poster-link'})['href']
            page2 = requests.get(f'https://flixable.com{href}')
            soup2 = BeautifulSoup(page2.content, 'html.parser')

            #title
            title = soup2.find('h1', attrs={'class':'mb-3'}).get_text().strip()
            #image
            image = soup2.find('img', attrs={'class':'poster mb-4'})['src']
            #year
            year = soup2.find('span', attrs={'class':'mr-2'}).get_text()

            row = soup2.find('div', attrs={'class':'mb-2'})

            movie_traits = row.find_all('span')
            # release year
            release_year = movie_traits[0].get_text()
            # rating
            rating = movie_traits[1].get_text()
            # runtime
            runtime = movie_traits[2].get_text()

            movie_box = soup2.find('div', attrs={'class':'col-md-8'})
            # movie plot
            plot = movie_box.find('div', attrs={'class':'mb-3'}).get_text().strip()
            # movie genres
            genres = movie_box.find_all('div', attrs={'class':'mb-2'})[1].get_text()
            movie_genres = genres.replace('Genres:','').replace(',','').replace('Movies','').replace('&','').strip().split()

            #add to dictionary
            full_movie_entry = {'title':title, 'image':image, 'year':year, 
                                'release_year':release_year, 'rating':rating, 'runtime':runtime,
                                'plot':plot, 'genres':movie_genres}

            all_tv.append(full_movie_entry)
            print('{} is done'.format(title))
        except:
            pass

f = open("tv.csv", "w")
writer = csv.DictWriter(f, fieldnames=['title', 'image', 'year', 'release_year', 'rating', 'runtime', 'plot', 'genres'])
writer.writeheader()
writer.writerows(all_tv)
f.close()

Cheer is done
Dracula is done
Go! Go! Cory Carson is done
Anne with an E is done
Sex, Explained is done
Thieves of the Wood is done
Because This Is My First Life is done
Border Security: America's Front Line is done
Cells at Work! is done
Chhota Bheem Kung Fu Dhamaka Series is done
Drugs, Inc. is done
Live Up To Your Name is done
Messiah is done
Mia and Me is done
Nisman: The Prosecutor, the President, and the Spy is done
Oddbods is done
Reply 1988 is done
Rurouni Kenshin is done
Saint Seiya is done
Save Me is done
The Bride of Habaek is done
The Circle is done
ARASHI's Diary -Voyage- is done
Good Girls is done
Occupied is done
Pucca: Love Recipe is done
Rainbow Ruby is done
The Degenerates is done
Alexa & Katie is done
The Gift is done
The Bonfire of Destiny is done
CAROLE & TUESDAY is done


KeyboardInterrupt: 