In [2]:
import pandas as pd 
from datetime import datetime, timezone

In [4]:
date = datetime.date.today()
date

datetime.date(2025, 11, 4)

In [7]:
# Use the imported datetime class's now() and get the date part
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
timestamp

'20251106_001543'

In [None]:
import requests
import pandas as pd
from datetime import datetime, timezone
import os

LANGUAGES = ["pt", "en", "es"]
LIMIT = 100
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
CLIENT_ID = os.getenv("CLIENT_ID")

def get_streams(language):
    url = f"https://api.twitch.tv/helix/streams?first={LIMIT}&language={language}"
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {ACCESS_TOKEN}"
    }
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Erro {response.status_code}: {response.text}")
        return []
    return response.json().get("data", [])

def collect_all_streams():
    all_streams = []
    for lang in LANGUAGES:
        streams = get_streams(lang)
        for s in streams:
            started = datetime.fromisoformat(s["started_at"].replace("Z", "+00:00"))
            duration_min = (datetime.now(timezone.utc) - started).total_seconds() / 60
            all_streams.append({
                "streamer": s["user_name"],
                "categoria": s["game_name"],
                "idioma": s["language"],
                "viewers": s["viewer_count"],
                "inicio": s["started_at"],
                "duracao_min": round(duration_min, 1),
                "hora": started.hour,
                "dia_semana": started.strftime("%A")
            })
    print(f"Total de streams coletadas: {len(all_streams)}")
    return pd.DataFrame(all_streams)

In [3]:
df = collect_all_streams()
df.head()

Total de streams coletadas: 294


Unnamed: 0,streamer,categoria,idioma,viewers,inicio,duracao_min,hora,dia_semana
0,JonVlogs,Just Chatting,pt,7663,2025-11-08T11:47:27Z,462.7,11,Saturday
1,Baiano,League of Legends,pt,5405,2025-11-08T14:37:10Z,293.0,14,Saturday
2,gabepeixe,Z1: Battle Royale,pt,3075,2025-11-08T15:07:13Z,263.0,15,Saturday
3,TcK10,Battlefield REDSEC,pt,2830,2025-11-08T14:45:40Z,284.5,14,Saturday
4,RazaH,Tom Clancy's Rainbow Six Siege X,pt,2463,2025-11-08T12:01:58Z,448.2,12,Saturday


In [4]:
import boto3
from botocore.exceptions import NoCredentialsError
import os
from datetime import datetime, timezone
import io
import pandas as pd

def s3_upload_parquet(df, prefix="raw"):
    now = datetime.now(timezone.utc)
    bucket = os.getenv("S3_BUCKET_NAME")
    #file_name = f"twitch_streams_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.json"
    prefix = f"silver/year={now.year}/month={now.month:02d}/day={now.day:02d}"
    file_name = f"twitch_streams_{now.strftime('%Y%m%d_%H%M%S')}.parquet"
    object_name = f"{prefix}/{file_name}"

    s3_client = boto3.client(
        "s3",
        aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
        aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
        region_name=os.getenv("AWS_REGION")
    )

    try:
        #json_buffer = df.to_json(orient="records", force_ascii=False, indent=2)
        buffer = io.BytesIO()
        df.to_parquet(buffer, index=False, engine="pyarrow", compression="snappy")
        buffer.seek(0)
        
        s3_client.put_object(
            Body=buffer.getvalue(),
            Bucket=bucket,
            Key=object_name,
            ContentType="application/parquet-streams"
        )
        print(f"Arquivo {object_name} enviado para o bucket {bucket}.")
    except NoCredentialsError:
        print("Credenciais AWS não encontradas.")
    except Exception as e:
        print(f"Erro ao enviar para o S3: {e}")


In [5]:
buffer = io.BytesIO()
df.to_parquet(buffer, index=False, engine="pyarrow", compression="snappy")

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 294 entries, 0 to 293
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   streamer     294 non-null    object 
 1   categoria    294 non-null    object 
 2   idioma       294 non-null    object 
 3   viewers      294 non-null    int64  
 4   inicio       294 non-null    object 
 5   duracao_min  294 non-null    float64
 6   hora         294 non-null    int64  
 7   dia_semana   294 non-null    object 
dtypes: float64(1), int64(2), object(5)
memory usage: 18.5+ KB


In [3]:
df = pd.read_parquet("twitch_streams_20251110_011618.parquet")
df

Unnamed: 0,streamer,categoria,idioma,viewers,inicio,duracao_min,hora,dia_semana,user_id,game_id,is_mature,tags
0,JonVlogs,Just Chatting,pt,15634,2025-11-09T21:29:54Z,226.4,21,Sunday,103989988,509658,False,"[Português, jonvlogs, bdj, IRL]"
1,HudsonAmorim1,Virtual Casino,pt,4126,2025-11-09T23:09:31Z,126.8,23,Sunday,450476144,29452,False,[Português]
2,Baiano,League of Legends,pt,1989,2025-11-09T15:38:03Z,578.2,15,Sunday,140772558,21779,True,"[Português, DropsAtivados]"
3,Rubini,Tibia,pt,1852,2025-11-09T22:07:59Z,188.3,22,Sunday,113206860,19619,True,"[Português, Medivia, MMORPG, Tibia, Rubinot]"
4,Olkabone,Just Chatting,pt,1826,2025-11-09T17:28:16Z,468.0,17,Sunday,465218900,509658,False,"[Português, react, game, Roleplay, DropsAtivados]"
...,...,...,...,...,...,...,...,...,...,...,...,...
288,EXTREMEBREAKER1,Halo Infinite,es,147,2025-11-09T22:52:09Z,144.2,22,Sunday,35978160,506416,False,"[Español, English, DropsActivados]"
289,stepz2,Fortnite,es,145,2025-11-09T21:45:53Z,210.4,21,Sunday,516913669,33214,False,"[Español, Venezuela, DropsActivados]"
290,Feeckz,Albion Online,es,132,2025-11-09T20:55:28Z,260.8,20,Sunday,240510832,417528,False,"[Español, MMORPG, giveaway, Europa, AlbionOnli..."
291,Devas70,Dead by Daylight,es,97,2025-11-09T22:12:52Z,183.4,22,Sunday,1050668015,491487,False,"[devas008, devas, deadbydaylight, terror, top5..."
