In [2]:
from pathlib import Path
import os

def set_project_root():
    p = Path.cwd()
    if p.name == "notebooks":
        os.chdir("..")
    print("Root:", Path.cwd())

set_project_root()

Root: c:\Users\cubea\OneDrive\Escritorio\project_esports_occidente


In [3]:
from pathlib import Path
import pandas as pd

BASE = Path.cwd()
RAW  = BASE / "data" / "raw"
PROC = BASE / "data" / "processed"

# Rutas origen (siempre iguales)
T_TOP   = RAW / "top_twitch" / "Twitch_game_data.csv"
T_GLOB  = RAW / "top_twitch" / "Twitch_global_data.csv"
E_HIST  = RAW / "esports_earnings" / "HistoricalEsportData.csv"
E_GEN   = RAW / "esports_earnings" / "GeneralEsportData.csv"

def read_csv_smart(path):
    for enc in (None, "cp1252", "latin1", "utf-16"):
        for sep in (",", ";"):
            try:
                return pd.read_csv(path, encoding=enc, sep=sep)
            except Exception:
                pass
    raise RuntimeError(f"No pude leer: {path}")

# ejemplo de uso
# twitch_df = read_csv_smart(T_TOP)
# earnings_hist_df = read_csv_smart(E_HIST)


In [4]:
for p in [T_TOP, T_GLOB, E_HIST, E_GEN]:
    assert p.exists(), f"Falta: {p}"
print("✅ CSV origen encontrados")


✅ CSV origen encontrados


In [6]:
from pathlib import Path
import pandas as pd

PROC = Path("data/processed")
CANDIDATES = [
    PROC / "Esports_Occidente_Master.csv",
    PROC / "review" / "Esports_Occidente_2016_2024_Normalizado.csv",
    PROC / "review" / "Esports_Occidente_2016_2024.csv",
    PROC / "review" / "esports_occidente_final.csv",
]

df = None
for p in CANDIDATES:
    if p.exists():
        df = pd.read_csv(p, encoding="utf-8")
        print(f"✅ Cargado: {p}")
        break

if df is None:
    print("⚠️ No encontré ninguno de los candidatos. CSVs disponibles:")
    for q in PROC.glob("**/*.csv"):
        print(" -", q)

# si vas a usar nombres abreviados en análisis:
if df is not None:
    df = df.rename(columns={
        "PremiosUSD_Millones": "PremiosUSD_M",
        "PremiosTotalesUSD_Millones": "PremiosTotalesUSD_M"
    })
    display(df.head(), df.info())


✅ Cargado: data\processed\review\Esports_Occidente_2016_2024.csv
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4700 entries, 0 to 4699
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   JuegoNorm          4699 non-null   object 
 1   Año                4700 non-null   int64  
 2   MediaViewers       4700 non-null   float64
 3   PicoViewers        4700 non-null   int64  
 4   HorasVistas        4700 non-null   int64  
 5   PremiosUSD         521 non-null    float64
 6   Torneos            521 non-null    float64
 7   Género             769 non-null    object 
 8   PremiosTotalesUSD  769 non-null    float64
 9   JugadoresTotales   769 non-null    float64
 10  TorneosTotales     769 non-null    float64
dtypes: float64(6), int64(3), object(2)
memory usage: 404.0+ KB


Unnamed: 0,JuegoNorm,Año,MediaViewers,PicoViewers,HorasVistas,PremiosUSD,Torneos,Género,PremiosTotalesUSD,JugadoresTotales,TorneosTotales
0,60 seconds!,2016,517.666667,56904,1151013,,,,,,
1,7 days to die,2016,750.75,36862,6639960,,,,,,
2,abz<u+00db>,2016,455.0,28205,338342,,,,,,
3,agar.io,2016,295.6,6655,1075214,,,,,,
4,age of empires,2016,334.0,107455,248884,64047.8,22.0,Strategy,736284.75,624.0,341.0


None

In [7]:
from pathlib import Path
import pandas as pd

PROC = Path("data/processed")
review = PROC / "review"

# df ya cargado en la celda anterior
cols_map = {
    # acepta variantes y las lleva a tu estándar
    "JuegoNorm":"JuegoNorm", "Juego":"Juego",
    "Año":"Año",
    "Género":"Género", "Genero":"Género",
    "MediaViewers":"MediaViewers", "MediaViewers_K":"MediaViewers_K",
    "PicoViewers":"PicoViewers", "PicoViewers_K":"PicoViewers_K",
    "HorasVistas":"HorasVistas", "HorasVistas_Millones":"HorasVistas_Millones",
    "PremiosUSD":"PremiosUSD", "PremiosUSD_M":"PremiosUSD_M",
    "PremiosTotalesUSD":"PremiosTotalesUSD","PremiosTotalesUSD_M":"PremiosTotalesUSD_M",
    "Torneos":"Torneos","JugadoresTotales":"JugadoresTotales","TorneosTotales":"TorneosTotales",
}

df = df.rename(columns=cols_map)

# si vienen sin escalar, crea columnas K/M (no pisa si ya existen)
if "MediaViewers_K" not in df and "MediaViewers" in df:
    df["MediaViewers_K"] = (pd.to_numeric(df["MediaViewers"], errors="coerce")/1_000).round(1)
if "PicoViewers_K" not in df and "PicoViewers" in df:
    df["PicoViewers_K"] = (pd.to_numeric(df["PicoViewers"], errors="coerce")/1_000).round(1)
if "HorasVistas_Millones" not in df and "HorasVistas" in df:
    df["HorasVistas_Millones"] = (pd.to_numeric(df["HorasVistas"], errors="coerce")/1_000_000).round(1)
if "PremiosUSD_M" not in df and "PremiosUSD" in df:
    df["PremiosUSD_M"] = (pd.to_numeric(df["PremiosUSD"], errors="coerce")/1_000_000).round(2)
if "PremiosTotalesUSD_M" not in df and "PremiosTotalesUSD" in df:
    df["PremiosTotalesUSD_M"] = (pd.to_numeric(df["PremiosTotalesUSD"], errors="coerce")/1_000_000).round(2)

# tipos + limpieza
df["Año"] = pd.to_numeric(df["Año"], errors="coerce").astype("Int64")
for c in ["MediaViewers_K","PicoViewers_K","HorasVistas_Millones","PremiosUSD_M","PremiosTotalesUSD_M"]:
    if c in df: df[c] = pd.to_numeric(df[c], errors="coerce").round(2)
df["Género"] = df.get("Género", "Desconocido").fillna("Desconocido")

# quita duplicados por clave
df = df.drop_duplicates(subset=["JuegoNorm","Año"]).sort_values(["Año","JuegoNorm"]).reset_index(drop=True)

# columnas finales ordenadas (usa las que existan)
wanted = ["JuegoNorm","Año","Género","MediaViewers_K","PicoViewers_K","HorasVistas_Millones",
          "PremiosUSD_M","Torneos","PremiosTotalesUSD_M","JugadoresTotales","TorneosTotales"]
df = df[[c for c in wanted if c in df.columns]]

# guarda maestro
PROC.mkdir(parents=True, exist_ok=True)
master_path = PROC / "Esports_Occidente_Master.csv"
df.to_csv(master_path, index=False)
print(f"✅ Master guardado: {master_path}")
display(df.head())


✅ Master guardado: data\processed\Esports_Occidente_Master.csv


Unnamed: 0,JuegoNorm,Año,Género,MediaViewers_K,PicoViewers_K,HorasVistas_Millones,PremiosUSD_M,Torneos,PremiosTotalesUSD_M,JugadoresTotales,TorneosTotales
0,60 seconds!,2016,Desconocido,0.5,56.9,1.2,,,,,
1,7 days to die,2016,Desconocido,0.8,36.9,6.6,,,,,
2,abz<u+00db>,2016,Desconocido,0.5,28.2,0.3,,,,,
3,agar.io,2016,Desconocido,0.3,6.7,1.1,,,,,
4,age of empires,2016,Strategy,0.3,107.5,0.2,0.06,22.0,0.74,624.0,341.0


In [8]:
def view(df, year=None, game=None, genre=None, cols=None):
    q = df.copy()
    if year is not None:  q = q[q["Año"].eq(year)]
    if game is not None:  q = q[q["JuegoNorm"].eq(game)]
    if genre is not None: q = q[q["Género"].str.contains(genre, case=False, na=False)]
    return q[cols] if cols else q

# ejemplos:
v2017 = view(df, year=2017)
v_lol  = view(df, game="lol")
v_shoot = view(df, genre="shoot")
display(v2017.head(), v_lol.head(), v_shoot.head())


Unnamed: 0,JuegoNorm,Año,Género,MediaViewers_K,PicoViewers_K,HorasVistas_Millones,PremiosUSD_M,Torneos,PremiosTotalesUSD_M,JugadoresTotales,TorneosTotales
619,.hack//g.u. last recode,2017,Desconocido,0.2,1.2,0.1,,,,,
620,60 seconds!,2017,Desconocido,0.3,31.3,0.6,,,,,
621,7 days to die,2017,Desconocido,0.9,39.5,8.3,,,,,
622,<u+014c>kami,2017,Desconocido,0.2,29.4,0.2,,,,,
623,a hat in time,2017,Desconocido,0.5,28.6,0.7,,,,,


Unnamed: 0,JuegoNorm,Año,Género,MediaViewers_K,PicoViewers_K,HorasVistas_Millones,PremiosUSD_M,Torneos,PremiosTotalesUSD_M,JugadoresTotales,TorneosTotales
273,lol,2016,Multiplayer Online Battle Arena,118.1,952.3,1035.5,10.52,182.0,113.6,9588.0,3000.0
895,lol,2017,Multiplayer Online Battle Arena,117.1,1084.3,1022.4,12.26,193.0,113.6,9588.0,3000.0
1494,lol,2018,Multiplayer Online Battle Arena,112.6,1147.8,985.4,14.69,195.0,113.6,9588.0,3000.0
2043,lol,2019,Multiplayer Online Battle Arena,127.9,1782.8,1118.4,10.41,210.0,113.6,9588.0,3000.0
2551,lol,2020,Multiplayer Online Battle Arena,177.2,2020.8,1553.5,8.77,123.0,113.6,9588.0,3000.0


Unnamed: 0,JuegoNorm,Año,Género,MediaViewers_K,PicoViewers_K,HorasVistas_Millones,PremiosUSD_M,Torneos,PremiosTotalesUSD_M,JugadoresTotales,TorneosTotales
29,battlefield 1,2016,First-Person Shooter,4.6,80.9,20.2,,,0.0,0.0,0.0
30,battlefield 4,2016,First-Person Shooter,0.9,482.9,6.4,0.01,3.0,0.25,204.0,145.0
31,battlefield hardline,2016,First-Person Shooter,0.1,47.1,0.1,,,0.01,20.0,1.0
57,call of duty 4: modern warfare,2016,First-Person Shooter,0.4,29.7,0.8,,,0.43,431.0,87.0
58,call of duty: black ops,2016,First-Person Shooter,0.3,8.7,0.5,,,0.38,118.0,23.0


In [9]:
(review / "Esports_Occidente_Master_Review.csv").write_text(df.to_csv(index=False))
print("📁 Copia de revisión creada en /processed/review/")


📁 Copia de revisión creada en /processed/review/
