In [49]:
import tqdm
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
import json
from sqlalchemy import create_engine
from pandas.io.json import json_normalize

# Collecting Data with Vimeo's API

## Goal

> Check which videos from Vimeo's staff picks are featured at the blog Motionographer.com.

## Endpoints

### Testing

* Generate an APP
* Vimeo's API: https://developer.vimeo.com/api/guides/start
* Token: https://developer.vimeo.com/apps/168643#personal_access_tokens
* API wrapper pip install PyVimeo

In [2]:
headers = {"Authorization": "Bearer 6d797fb7512534142b202cc24aaab742"}
endpoint = 'https://api.vimeo.com/tutorial'
response = requests.get(endpoint, headers=headers)

In [3]:
response.json()

{'message': 'Success! You just interacted with the Vimeo API. Your dev environment is configured correctly, and the client ID, client secret, and access token that you provided are all working fine.',
 'next_steps_link': 'https://developer.vimeo.com/api/guides/videos/upload',
 'token_is_authenticated': True}

### Staff picks' endpoint

In [89]:
headers = {"Authorization": "Bearer 6d797fb7512534142b202cc24aaab742"}
endpoint = f'https://api.vimeo.com/channels/staffpicks/videos?page={current_page}'
vimeo_page = requests.get(endpoint, headers=headers)

In [90]:
vimeo_page

<Response [504]>

* Write function to get all pages
* Get useful data (name, link, release_time, plays (?), tags, categories)

## Saving results

In [96]:
# cur_page = get_pages() # fazer um lambda aqui pra impressionar xP

page_df = pd.json_normalize(page_content['data'])
page_df['categories']

def get_page(current_page):    
    '''
    Takes current_page number. Returns dict.
    Return = {
        'paging': {
            'next': next page's uri or none
        }, 
        'data': [
            {},
            ...
        ]
    }
    '''
    
    headers = {"Authorization": "Bearer 6d797fb7512534142b202cc24aaab742"}
    endpoint = f'https://api.vimeo.com/channels/staffpicks/videos?page={current_page}'
    vimeo_page = requests.get(endpoint, headers=headers)
    page_content = vimeo_page.json()
    return page_content

def all_pages():
    vimeo_dataframe = pd.DataFrame()
    response = get_page(1)
    page_data = pd.json_normalize(response['data'])
    vimeo_dataframe.append(page_data)
    
    curr_page = response['paging']
    if curr_page['next'] != None:
        next_page = curr_page + 1
    else:
        break

{'total': 14184,
 'page': 1,
 'per_page': 25,
 'paging': {'next': '/channels/staffpicks/videos?page=2',
  'previous': None,
  'first': '/channels/staffpicks/videos?page=1',
  'last': '/channels/staffpicks/videos?page=568'},
 'data': [{'uri': '/videos/399740154',
   'name': 'THIS PERFECT DAY',
   'description': "Jules walks into a music store. This could be the day that changes their life. \n\nWorld Premiere at Tribeca Film Festival 2019.\nWinner of Rebel8 Outstanding Emerging Female Director Award at Flickerfest 2020.\nFeatured in 'Top 30 Short Films of 2019' by Close-Up Culture.\n\nJules: Michelle Keating\nJohn: Lee Mason\nRuby: Hannah Koch\n\nWriter/Director/Editor: Lydia Rui (@lydiaruihuang)\nProducer: Olivia Cheung\nExecutive Producer: Lily Chen\n\nDirector of Photography: Alice Stephens\nProduction Designer & Costume Designer: Eleanor Steiner\n1st AD: Kean Szczur\n\nSound Designer: Gunay Demirci\nComposer: Tommy Spender\nColourist: Abe Wynen\n\n1st AC: Cameron Stewart \n2nd AC: Je

In [95]:
page_df

Unnamed: 0,uri,name,description,type,link,duration,width,language,height,created_time,...,upload.form,upload.approach,upload.size,upload.redirect_url,transcode.status,metadata.connections.versions.uri,metadata.connections.versions.options,metadata.connections.versions.total,metadata.connections.versions.current_uri,metadata.connections.versions.resource_key
0,/videos/399740154,THIS PERFECT DAY,Jules walks into a music store. This could be ...,video,https://vimeo.com/lydiarui/thisperfectday,427,1920,en,1080,2020-03-22T23:24:37+00:00,...,,,,,complete,,,,,
1,/videos/399313424,FLUT by Malte Stein,A dirty lake bursts its banks and begins to fl...,video,https://vimeo.com/399313424,595,1920,,1080,2020-03-20T22:21:38+00:00,...,,,,,complete,,,,,
2,/videos/398515051,A HISTORY OF SILENCE: THE CINEMA OF LOIS WEBER,"In 1914, Lois Weber became one of the first fe...",video,https://vimeo.com/398515051,469,1920,en,800,2020-03-18T13:40:40+00:00,...,,,,,complete,,,,,
3,/videos/398394152,Raymonde or the Vertical Escape by Sarah Van D...,"Raymonde is really fed up with peas, aphids, d...",video,https://vimeo.com/papy3d/raymonde,1001,1920,fr,1080,2020-03-18T00:20:36+00:00,...,,,,,complete,/videos/398394152/versions,[GET],1.0,/videos/398394152/versions/301088521,b1bbfde1304f21ad786c785613a3929db4f04fd7
4,/videos/398365805,The Beep Test / La course navette,"""Wojtek is a mysterious new student at school....",video,https://vimeo.com/398365805,886,1920,,1080,2020-03-17T21:50:26+00:00,...,,,,,complete,,,,,
5,/videos/398324940,PLACE,"Wanting a fresh start, Lauren (Emily Green) mo...",video,https://vimeo.com/398324940,635,1920,en-US,1080,2020-03-17T19:20:43+00:00,...,,,,,complete,,,,,
6,/videos/397995833,Double Tap,A screen-obsessed teen ignores an instagram ch...,video,https://vimeo.com/397995833,173,1920,,1080,2020-03-16T15:26:23+00:00,...,,,,,complete,/videos/397995833/versions,[GET],1.0,/videos/397995833/versions/300641474,559af6d79370b842fccda4fb95fca73fd67596a7
7,/videos/397912933,JUTLAND II | Breath of the Seasons,Watch in 4K or HD with headphones or quality s...,video,https://vimeo.com/397912933,212,3840,,2160,2020-03-16T07:59:44+00:00,...,,,,,complete,,,,,
8,/videos/397673759,AUDI - ELECTRIC WAVE,DIRECTOR - DANIEL ASKILL\nEDITOR - LORIN ASKILL,video,https://vimeo.com/397673759,245,1920,,1080,2020-03-14T20:18:01+00:00,...,,,,,complete,,,,,
9,/videos/397171653,The Collector,The true cost for relief can be unexpected. \n...,video,https://vimeo.com/397171653,639,1920,,1080,2020-03-12T12:59:02+00:00,...,,,,,complete,,,,,


## Export json (to open in VSC)

In [None]:
with open('page_1.json', 'w') as page_1: 
    json.dump(pag1_vimeo, page_1)

# Web scraping Data from Motionographer

> Motionographer - curated motion design content: http://motionographer.com/

## Downloading the page

## Filter data (beautiful soup)

## Save results

# Results 

## Saving to Database