In [25]:
# Testando API YouTube
import json
import pandas as pd
import seaborn as sns
import numpy as np
import requests
import csv 
from datetime import datetime, timezone

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors
from googleapiclient.discovery import build

In [2]:
# Colocando nossa chave da API do YouTube
import os
from getpass import getpass

api_key = getpass("Insira aqui sua API key da OpenIA:")


In [3]:
# Função para buscar os vídeos em alta no YouTube e armazenar os dados em um DF
def videos_em_alta(n_videos: int, region: str):

    youtube = build('youtube', 'v3', developerKey=api_key)
    response = youtube.videos().list(
        part="snippet, statistics", 
        chart="mostPopular", 
        regionCode=region, 
        maxResults=n_videos,
            ).execute()
    videos = response.get("items", []) 

    #caso não há vídeos em alta exibirá a mensagem
    if not videos:
        print("Nenhum vídeo em alta foi encontrado! Ou informações não estão disponíveis")

    # dicionário para salvar as informações
    video_dict = {}

    for video in videos:
        

    # Extraindo as informações 
        video_id = video["id"]
        title = video["snippet"]["title"]
        views = video["statistics"]["viewCount"]
        likes = video["statistics"]["likeCount"]
        comments = video["statistics"]["commentCount"]
        published_at = video["snippet"]["publishedAt"]
        link_video = f"https://www.youtube.com/watch?v={video_id}"

        id = video["snippet"]['categoryId']
        response_cat = youtube.videoCategories().list(part="snippet",  id=id).execute()
        category = response_cat.get("items", [])[0]
        category_title = category["snippet"]["title"]

    # Armazenando as infos no dicionário
        video_dict[title] = {
            "views": views,
            "likes": likes,
            "comentarios": comments,
            "publicacao": published_at,
            "link_video": link_video,
            "categoria": category_title
        }
    # Salvando os dados em um DataFrame
    df = pd.DataFrame.from_dict(video_dict, orient="index")
    df = df.reset_index().rename(columns={'index': 'video'})

    # Convertendo a coluna 'publicacao' para datetime e criando uma nova coluna com o tempo postado
    current_time = datetime.now(timezone.utc)

    df['publicacao'] = pd.to_datetime(df['publicacao'])
    df["horas-publicacao"] = (current_time - df["publicacao"]).dt.total_seconds() / 3600
    df["horas-publicacao"] = df["horas-publicacao"].round()
    return df


In [4]:
# função para fazer update diário dos novos vídeos
def update_df(df, df_up):
    df = pd.concat([df,df_up])
    return df.reset_index(drop=True)


In [5]:
# Abrindo e salvando o df em uma variável
df = videos_em_alta(10, 'BR')
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao
0,Ep.1 Corrida das Blogueiras 5: COMBO DE VERÃO ...,346650,51131,4474,2023-11-07 22:59:09+00:00,https://www.youtube.com/watch?v=-_hCTBtn4aY,Entertainment,20.0
1,LIGA MFL: GRANDE FINAL 🔥| NOISE - CRIAS - FAZ ...,578355,49021,59,2023-11-08 02:43:25+00:00,https://www.youtube.com/watch?v=Ot7t2Bsx1jY,Gaming,16.0
2,"Ludmilla, Pabllo Vittar, Marina Sena, Duda Bea...",84384,4321,466,2023-11-08 02:52:15+00:00,https://www.youtube.com/watch?v=176sdftBgYA,Music,16.0
3,수진 (SOOJIN) '아가씨' Official MV,2258557,344675,26465,2023-11-08 09:00:27+00:00,https://www.youtube.com/watch?v=1p2cafgpD1M,Music,10.0
4,"Pedro Sampaio, Joelma, NXZero, Tati Quebra-Bar...",78158,4995,876,2023-11-08 01:39:56+00:00,https://www.youtube.com/watch?v=c9iPhnDjbxo,Music,17.0
5,Luísa Sonza canta 'Chico' e 'La Muerte' no Prê...,87565,4803,294,2023-11-08 04:21:38+00:00,https://www.youtube.com/watch?v=iNKj4BVtiAw,Music,15.0
6,BASTIDORES | VASCO 1 X 0 BOTAFOGO | VASCOTV,128003,19034,645,2023-11-08 00:30:10+00:00,https://www.youtube.com/watch?v=C03HDhgCeqw,Sports,18.0
7,EPISÓDIO 08 - 2/5: Semifinal EMOCIONANTE | TEM...,320200,9793,513,2023-11-08 08:01:00+00:00,https://www.youtube.com/watch?v=pLoAhwLjAB0,Entertainment,11.0
8,ZANELLA E EMANUELLY… O QUE É ISSO?,239766,16513,671,2023-11-07 21:30:02+00:00,https://www.youtube.com/watch?v=Gs9DKhhSjew,Entertainment,21.0
9,Gabriela Rocha - Me Atraiu (Reimagined),123855,15675,1317,2023-11-07 20:00:24+00:00,https://www.youtube.com/watch?v=_UN2gwabRBI,Music,23.0


In [6]:
df_us = videos_em_alta(10, 'US')


In [7]:

df = update_df(df, df_us)

In [8]:
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao
0,Ep.1 Corrida das Blogueiras 5: COMBO DE VERÃO ...,346650,51131,4474,2023-11-07 22:59:09+00:00,https://www.youtube.com/watch?v=-_hCTBtn4aY,Entertainment,20.0
1,LIGA MFL: GRANDE FINAL 🔥| NOISE - CRIAS - FAZ ...,578355,49021,59,2023-11-08 02:43:25+00:00,https://www.youtube.com/watch?v=Ot7t2Bsx1jY,Gaming,16.0
2,"Ludmilla, Pabllo Vittar, Marina Sena, Duda Bea...",84384,4321,466,2023-11-08 02:52:15+00:00,https://www.youtube.com/watch?v=176sdftBgYA,Music,16.0
3,수진 (SOOJIN) '아가씨' Official MV,2258557,344675,26465,2023-11-08 09:00:27+00:00,https://www.youtube.com/watch?v=1p2cafgpD1M,Music,10.0
4,"Pedro Sampaio, Joelma, NXZero, Tati Quebra-Bar...",78158,4995,876,2023-11-08 01:39:56+00:00,https://www.youtube.com/watch?v=c9iPhnDjbxo,Music,17.0
5,Luísa Sonza canta 'Chico' e 'La Muerte' no Prê...,87565,4803,294,2023-11-08 04:21:38+00:00,https://www.youtube.com/watch?v=iNKj4BVtiAw,Music,15.0
6,BASTIDORES | VASCO 1 X 0 BOTAFOGO | VASCOTV,128003,19034,645,2023-11-08 00:30:10+00:00,https://www.youtube.com/watch?v=C03HDhgCeqw,Sports,18.0
7,EPISÓDIO 08 - 2/5: Semifinal EMOCIONANTE | TEM...,320200,9793,513,2023-11-08 08:01:00+00:00,https://www.youtube.com/watch?v=pLoAhwLjAB0,Entertainment,11.0
8,ZANELLA E EMANUELLY… O QUE É ISSO?,239766,16513,671,2023-11-07 21:30:02+00:00,https://www.youtube.com/watch?v=Gs9DKhhSjew,Entertainment,21.0
9,Gabriela Rocha - Me Atraiu (Reimagined),123855,15675,1317,2023-11-07 20:00:24+00:00,https://www.youtube.com/watch?v=_UN2gwabRBI,Music,23.0


In [9]:
# converendo as colunas 'views', 'likes', 'comentarios' para int
df['views'] = df['views'].astype(int)
df['likes'] = df['likes'].astype(int)
df['comentarios'] = df['comentarios'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype              
---  ------            --------------  -----              
 0   video             20 non-null     object             
 1   views             20 non-null     int32              
 2   likes             20 non-null     int32              
 3   comentarios       20 non-null     int32              
 4   publicacao        20 non-null     datetime64[ns, UTC]
 5   link_video        20 non-null     object             
 6   categoria         20 non-null     object             
 7   horas-publicacao  20 non-null     float64            
dtypes: datetime64[ns, UTC](1), float64(1), int32(3), object(3)
memory usage: 1.1+ KB


In [10]:
# calculando a quantidade de views por hora
df["views/hour"] = (df["views"]/df["horas-publicacao"]).round(2)
# excluindo os dados duplicados
df = df.drop_duplicates(subset='video').reset_index(drop=True)
df

Unnamed: 0,video,views,likes,comentarios,publicacao,link_video,categoria,horas-publicacao,views/hour
0,Ep.1 Corrida das Blogueiras 5: COMBO DE VERÃO ...,346650,51131,4474,2023-11-07 22:59:09+00:00,https://www.youtube.com/watch?v=-_hCTBtn4aY,Entertainment,20.0,17332.5
1,LIGA MFL: GRANDE FINAL 🔥| NOISE - CRIAS - FAZ ...,578355,49021,59,2023-11-08 02:43:25+00:00,https://www.youtube.com/watch?v=Ot7t2Bsx1jY,Gaming,16.0,36147.19
2,"Ludmilla, Pabllo Vittar, Marina Sena, Duda Bea...",84384,4321,466,2023-11-08 02:52:15+00:00,https://www.youtube.com/watch?v=176sdftBgYA,Music,16.0,5274.0
3,수진 (SOOJIN) '아가씨' Official MV,2258557,344675,26465,2023-11-08 09:00:27+00:00,https://www.youtube.com/watch?v=1p2cafgpD1M,Music,10.0,225855.7
4,"Pedro Sampaio, Joelma, NXZero, Tati Quebra-Bar...",78158,4995,876,2023-11-08 01:39:56+00:00,https://www.youtube.com/watch?v=c9iPhnDjbxo,Music,17.0,4597.53
5,Luísa Sonza canta 'Chico' e 'La Muerte' no Prê...,87565,4803,294,2023-11-08 04:21:38+00:00,https://www.youtube.com/watch?v=iNKj4BVtiAw,Music,15.0,5837.67
6,BASTIDORES | VASCO 1 X 0 BOTAFOGO | VASCOTV,128003,19034,645,2023-11-08 00:30:10+00:00,https://www.youtube.com/watch?v=C03HDhgCeqw,Sports,18.0,7111.28
7,EPISÓDIO 08 - 2/5: Semifinal EMOCIONANTE | TEM...,320200,9793,513,2023-11-08 08:01:00+00:00,https://www.youtube.com/watch?v=pLoAhwLjAB0,Entertainment,11.0,29109.09
8,ZANELLA E EMANUELLY… O QUE É ISSO?,239766,16513,671,2023-11-07 21:30:02+00:00,https://www.youtube.com/watch?v=Gs9DKhhSjew,Entertainment,21.0,11417.43
9,Gabriela Rocha - Me Atraiu (Reimagined),123855,15675,1317,2023-11-07 20:00:24+00:00,https://www.youtube.com/watch?v=_UN2gwabRBI,Music,23.0,5385.0


In [35]:
# Data de hoje 
import datetime

date_today = datetime.date.today()
date_today


datetime.date(2023, 11, 8)

In [36]:
# salvando o arquivo em um csv

df.to_csv(f'dados/top_10_youtube_{date_today}.csv', sep=';',index=False)