In [1]:
# Testando API YouTube
import json
import pandas as pd
import seaborn as sns
import numpy as np
import requests
import csv 
from datetime import datetime, timezone

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
from googleapiclient.discovery import build

In [2]:
# Colocando nossa chave da API do YouTube
import os
from getpass import getpass

api_key = getpass("Insira aqui sua API key do Youtube:")


In [3]:
# Função para buscar os vídeos em alta no YouTube e armazenar os dados em um DF
def videos_em_alta(n_videos: int, region: str):

    youtube = build('youtube', 'v3', developerKey=api_key)
    response = youtube.videos().list(
        part="snippet, statistics", 
        chart="mostPopular", 
        regionCode=region, 
        maxResults=n_videos,
            ).execute()
    videos = response.get("items", []) 

    #caso não há vídeos em alta exibirá a mensagem
    if not videos:
        print("Nenhum vídeo em alta foi encontrado! Ou informações não estão disponíveis")

    # dicionário para salvar as informações
    video_dict = {}

    for video in videos:
        

    # Extraindo as informações 
        video_id = video["id"]
        title = video["snippet"]["title"]
        views = video["statistics"]["viewCount"]
        likes = video["statistics"]["likeCount"]
        comments = video["statistics"]["commentCount"]
        published_at = video["snippet"]["publishedAt"]
        link_video = f"https://www.youtube.com/watch?v={video_id}"

        id = video["snippet"]['categoryId']
        response_cat = youtube.videoCategories().list(part="snippet",  id=id).execute()
        category = response_cat.get("items", [])[0]
        category_title = category["snippet"]["title"]

    # Armazenando as infos no dicionário
        video_dict[title] = {
            "views": views,
            "likes": likes,
            "comentarios": comments,
            "publicacao": published_at,
            "link_video": link_video,
            "categoria": category_title
        }
    # Salvando os dados em um DataFrame
    df = pd.DataFrame.from_dict(video_dict, orient="index")
    df = df.reset_index().rename(columns={'index': 'video'})

    # Convertendo a coluna 'publicacao' para datetime e criando uma nova coluna com o tempo postado
    current_time = datetime.now(timezone.utc)

    df['publicacao'] = pd.to_datetime(df['publicacao'])
    df["horas-publicacao"] = (current_time - df["publicacao"]).dt.total_seconds() / 3600
    df["horas-publicacao"] = df["horas-publicacao"].round()
    return df


In [4]:
# função para fazer update diário dos novos vídeos
def update_df(df, df_up):
    df = pd.concat([df,df_up])
    return df.reset_index(drop=True)


In [5]:
# Abrindo e salvando o df em uma variável
df = videos_em_alta(10, 'BR')
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao
0,Madame Teia | Trailer Oficial Dublado,1343330,21344,2586,2023-11-15 14:00:48+00:00,https://www.youtube.com/watch?v=yGouqVQ-wUw,Film & Animation,24.0
1,What If...? | Temporada 2 | Trailer Oficial Du...,218989,26235,776,2023-11-15 17:44:24+00:00,https://www.youtube.com/watch?v=EzkZiJR662M,Entertainment,21.0
2,Brasileiros me mandaram para Coreia do Norte,248198,45469,19753,2023-11-15 16:15:31+00:00,https://www.youtube.com/watch?v=rYyvT2o09FA,People & Blogs,22.0
3,BLACKPINK: A VR Encore – Official Teaser,566017,112144,4570,2023-11-15 22:00:10+00:00,https://www.youtube.com/watch?v=gExhbUo61n0,Music,16.0
4,Ep. 2 Corrida das Blogueiras 5: MAKE ARTÍSTICA...,478754,52759,4077,2023-11-14 22:00:11+00:00,https://www.youtube.com/watch?v=2uXVkzGHrBo,Entertainment,40.0
5,Drake - First Person Shooter ft. J Cole,2517756,306899,18151,2023-11-15 20:26:07+00:00,https://www.youtube.com/watch?v=Xty2gi5cMa8,Music,18.0
6,"Luísa Sonza, Tokischa - La Muerte",725460,37357,2230,2023-11-14 21:00:08+00:00,https://www.youtube.com/watch?v=BC9pBqO94mM,Music,41.0
7,REPÓRTER DOIDÃO | ENEM 2023,1028462,127342,4876,2023-11-14 12:00:48+00:00,https://www.youtube.com/watch?v=uw-zFtUQwOo,Comedy,50.0
8,"Agora é OFICIAL, nos MUDAMOS pra CASA nova! - ...",199768,27502,568,2023-11-14 21:00:07+00:00,https://www.youtube.com/watch?v=TmwGbuX_drA,Howto & Style,41.0
9,"Tchau Obrigado 2 - MC Ryan SP, Kadu, IG, MC GP...",1364045,75090,4100,2023-11-13 22:00:10+00:00,https://www.youtube.com/watch?v=8dfSth3DrO4,Music,64.0


In [6]:
df_us = videos_em_alta(10, 'US')


In [7]:

df = update_df(df, df_us)

In [8]:
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao
0,Madame Teia | Trailer Oficial Dublado,1343330,21344,2586,2023-11-15 14:00:48+00:00,https://www.youtube.com/watch?v=yGouqVQ-wUw,Film & Animation,24.0
1,What If...? | Temporada 2 | Trailer Oficial Du...,218989,26235,776,2023-11-15 17:44:24+00:00,https://www.youtube.com/watch?v=EzkZiJR662M,Entertainment,21.0
2,Brasileiros me mandaram para Coreia do Norte,248198,45469,19753,2023-11-15 16:15:31+00:00,https://www.youtube.com/watch?v=rYyvT2o09FA,People & Blogs,22.0
3,BLACKPINK: A VR Encore – Official Teaser,566017,112144,4570,2023-11-15 22:00:10+00:00,https://www.youtube.com/watch?v=gExhbUo61n0,Music,16.0
4,Ep. 2 Corrida das Blogueiras 5: MAKE ARTÍSTICA...,478754,52759,4077,2023-11-14 22:00:11+00:00,https://www.youtube.com/watch?v=2uXVkzGHrBo,Entertainment,40.0
5,Drake - First Person Shooter ft. J Cole,2517756,306899,18151,2023-11-15 20:26:07+00:00,https://www.youtube.com/watch?v=Xty2gi5cMa8,Music,18.0
6,"Luísa Sonza, Tokischa - La Muerte",725460,37357,2230,2023-11-14 21:00:08+00:00,https://www.youtube.com/watch?v=BC9pBqO94mM,Music,41.0
7,REPÓRTER DOIDÃO | ENEM 2023,1028462,127342,4876,2023-11-14 12:00:48+00:00,https://www.youtube.com/watch?v=uw-zFtUQwOo,Comedy,50.0
8,"Agora é OFICIAL, nos MUDAMOS pra CASA nova! - ...",199768,27502,568,2023-11-14 21:00:07+00:00,https://www.youtube.com/watch?v=TmwGbuX_drA,Howto & Style,41.0
9,"Tchau Obrigado 2 - MC Ryan SP, Kadu, IG, MC GP...",1364045,75090,4100,2023-11-13 22:00:10+00:00,https://www.youtube.com/watch?v=8dfSth3DrO4,Music,64.0


In [9]:
# converendo as colunas 'views', 'likes', 'comentarios' para int
df['views'] = df['views'].astype(int)
df['likes'] = df['likes'].astype(int)
df['comentarios'] = df['comentarios'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   video             20 non-null     object             
 1   views             20 non-null     int32              
 2   likes             20 non-null     int32              
 3   comentarios       20 non-null     int32              
 4   publicacao        20 non-null     datetime64[ns, UTC]
 5   link_video        20 non-null     object             
 6   categoria         20 non-null     object             
 7   horas-publicacao  20 non-null     float64            
dtypes: datetime64[ns, UTC](1), float64(1), int32(3), object(3)
memory usage: 1.1+ KB


In [10]:
# calculando a quantidade de views por hora
df["views/hour"] = (df["views"]/df["horas-publicacao"]).round(2)
# excluindo os dados duplicados
df = df.drop_duplicates(subset='video').reset_index(drop=True)
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao,views/hour
0,Madame Teia | Trailer Oficial Dublado,1343330,21344,2586,2023-11-15 14:00:48+00:00,https://www.youtube.com/watch?v=yGouqVQ-wUw,Film & Animation,24.0,55972.08
1,What If...? | Temporada 2 | Trailer Oficial Du...,218989,26235,776,2023-11-15 17:44:24+00:00,https://www.youtube.com/watch?v=EzkZiJR662M,Entertainment,21.0,10428.05
2,Brasileiros me mandaram para Coreia do Norte,248198,45469,19753,2023-11-15 16:15:31+00:00,https://www.youtube.com/watch?v=rYyvT2o09FA,People & Blogs,22.0,11281.73
3,BLACKPINK: A VR Encore – Official Teaser,566017,112144,4570,2023-11-15 22:00:10+00:00,https://www.youtube.com/watch?v=gExhbUo61n0,Music,16.0,35376.06
4,Ep. 2 Corrida das Blogueiras 5: MAKE ARTÍSTICA...,478754,52759,4077,2023-11-14 22:00:11+00:00,https://www.youtube.com/watch?v=2uXVkzGHrBo,Entertainment,40.0,11968.85
5,Drake - First Person Shooter ft. J Cole,2517756,306899,18151,2023-11-15 20:26:07+00:00,https://www.youtube.com/watch?v=Xty2gi5cMa8,Music,18.0,139875.33
6,"Luísa Sonza, Tokischa - La Muerte",725460,37357,2230,2023-11-14 21:00:08+00:00,https://www.youtube.com/watch?v=BC9pBqO94mM,Music,41.0,17694.15
7,REPÓRTER DOIDÃO | ENEM 2023,1028462,127342,4876,2023-11-14 12:00:48+00:00,https://www.youtube.com/watch?v=uw-zFtUQwOo,Comedy,50.0,20569.24
8,"Agora é OFICIAL, nos MUDAMOS pra CASA nova! - ...",199768,27502,568,2023-11-14 21:00:07+00:00,https://www.youtube.com/watch?v=TmwGbuX_drA,Howto & Style,41.0,4872.39
9,"Tchau Obrigado 2 - MC Ryan SP, Kadu, IG, MC GP...",1364045,75090,4100,2023-11-13 22:00:10+00:00,https://www.youtube.com/watch?v=8dfSth3DrO4,Music,64.0,21313.2


In [11]:
# Data de hoje 
import datetime

date_today = datetime.date.today()
date_today


datetime.date(2023, 11, 16)

In [12]:
# salvando o arquivo em um csv

df.to_csv(f'dados/top_10_youtube_{date_today}.csv', sep=';',index=False)