<a href="https://colab.research.google.com/github/WildAlex37/g_search_lite/blob/main/G_search_lite_cleaning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Récupération de la Data et cleaning


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import os
pd.options.display.max_columns = None

In [None]:
import os
import json
import time
import requests
import pandas as pd
from datetime import datetime, timedelta

# IGDB API credentials
CLIENT_ID = "v4dlufpsiskkalr4ht57stnpxeh3lo"
CLIENT_SECRET = "8a1okhz512zn4lenv42jzt30c24gd9"

# IGDB API endpoints
AUTH_URL = 'https://id.twitch.tv/oauth2/token'
API_ENDPOINT = 'https://api.igdb.com/v4/games'

# Define the platform IDs for Xbox Series, PS5, and Switch
PLATFORM_IDS = {
    'Xbox Series': 169,  # Xbox Series X/S
    'PS5': 167,          # PlayStation 5
    'Switch': 130        # Nintendo Switch
}

def get_access_token():
    auth_params = {
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'grant_type': 'client_credentials'
    }
    auth_response = requests.post(AUTH_URL, params=auth_params)
    return auth_response.json()['access_token']

def fetch_games(access_token, time_back: int=365, offset=0):
    headers = {
        'Client-ID': CLIENT_ID,
        'Authorization': f'Bearer {access_token}',
    }

    # Calculate the timestamp for a certain period
    limit_time = int((datetime.now() - timedelta(days=time_back)).timestamp())
    current_time = int(datetime.now().timestamp())

    # IGDB API query with rating, rating_count, and platform filter
    body = f"""
    fields name, first_release_date, release_dates.date, genres.name, platforms.name, summary, cover.url, rating, rating_count;
    where first_release_date >= {limit_time}
    & release_dates.date <= {current_time}
#    & platforms = ({PLATFORM_IDS['Xbox Series']}, {PLATFORM_IDS['PS5']}, {PLATFORM_IDS['Switch']});
    sort first_release_date desc;
    limit 500;
    offset {offset};
    """

    response = requests.post(API_ENDPOINT, headers=headers, data=body)
    return response.json()

def get_games(time_back: int):
    access_token = get_access_token()
    all_games = []
    offset = 0

    while True:
        games = fetch_games(access_token, time_back, offset)
        if not games:
            break

        all_games.extend(games)
        print(f"Fetched {len(games)} games. Total: {len(all_games)}")

        if len(games) < 500:
            break

        offset += 500
        time.sleep(0.3)  # Respect rate limits

    # Save all games to a JSON file
    with open('games_entry.json', 'w') as f:
        json.dump(all_games, f, indent=4)

    print(f"Scraped a total of {len(all_games)} games from the last {time_back} days.")
    return all_games

def flatten_data(games):
    flattened_games = []
    for game in games:
        flattened_game = {
            'name': game.get('name'),
            'first_release_date': pd.to_datetime(game.get('first_release_date'), unit='s') if game.get('first_release_date') else None,
            'genres': ', '.join([genre['name'] for genre in game.get('genres', [])]),
            'platforms': ', '.join([platform['name'] for platform in game.get('platforms', [])]),
            'summary': game.get('summary'),
            'rating': game.get('rating'),
            'rating_count': game.get('rating_count'),
            'cover': game.get('cover', {}).get('url') if isinstance(game.get('cover', {}), dict) else None
        }
        flattened_games.append(flattened_game)
    return flattened_games

if __name__ == "__main__":
    games = get_games(10000)
    flattened_games = flatten_data(games)

    # Create a DataFrame from the flattened data
    df = pd.DataFrame(flattened_games)

    # Filter the DataFrame for the relevant platforms
    df = df[df['platforms'].str.contains('PlayStation 5|Xbox Series|Nintendo Switch', na=False)]

    # Export to Parquet
    df.to_parquet('df_filtered.parquet')

    # Display the filtered DataFrame
    print(df.head())


Fetched 500 games. Total: 500
Fetched 500 games. Total: 1000
Fetched 500 games. Total: 1500
Fetched 500 games. Total: 2000
Fetched 500 games. Total: 2500
Fetched 500 games. Total: 3000
Fetched 500 games. Total: 3500
Fetched 500 games. Total: 4000
Fetched 500 games. Total: 4500
Fetched 500 games. Total: 5000
Fetched 500 games. Total: 5500
Fetched 500 games. Total: 6000
Fetched 500 games. Total: 6500
Fetched 500 games. Total: 7000
Fetched 500 games. Total: 7500
Fetched 500 games. Total: 8000
Fetched 500 games. Total: 8500
Fetched 500 games. Total: 9000
Fetched 500 games. Total: 9500
Fetched 500 games. Total: 10000
Fetched 500 games. Total: 10500
Fetched 500 games. Total: 11000
Fetched 500 games. Total: 11500
Fetched 500 games. Total: 12000
Fetched 500 games. Total: 12500
Fetched 500 games. Total: 13000
Fetched 500 games. Total: 13500
Fetched 500 games. Total: 14000
Fetched 500 games. Total: 14500
Fetched 500 games. Total: 15000
Fetched 500 games. Total: 15500
Fetched 500 games. Total: 16

In [None]:
df.sample(5)

Unnamed: 0,name,first_release_date,genres,platforms,summary,rating,rating_count,cover
14306,Rogue Explorer,2021-03-12,"Platform, Role-playing (RPG), Hack and slash/B...","PC (Microsoft Windows), PlayStation 4, Xbox On...",Pixel Art Hack & Slash Roguelike Action! Do yo...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...
7241,MLB The Show 23,2023-03-28,"Simulator, Sport","PlayStation 4, Xbox One, Nintendo Switch, Play...",Shock The Game. Own The Show.,85.978744,6.0,//images.igdb.com/igdb/image/upload/t_thumb/co...
15949,Package Inc.,2020-08-14,"Puzzle, Simulator, Strategy","iOS, Nintendo Switch",Package Inc. is an inspiring delivery simulato...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...
22070,Zaccaria Pinball: Aerobatics Table,2016-06-16,"Pinball, Indie","Linux, PC (Microsoft Windows), Mac, Nintendo S...",Aerobatics Table is a flying themed pinball ta...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...
792,That Time I Got Reincarnated as a Slime: Iseka...,2024-08-08,Role-playing (RPG),"Xbox One, Nintendo Switch, Xbox Series X|S",The Digital Deluxe Edition includes That Time ...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23458 entries, 0 to 23457
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   name                23458 non-null  object        
 1   first_release_date  23458 non-null  datetime64[ns]
 2   genres              23458 non-null  object        
 3   platforms           23458 non-null  object        
 4   summary             21395 non-null  object        
 5   rating              4685 non-null   float64       
 6   rating_count        4685 non-null   float64       
 7   cover               21357 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(5)
memory usage: 1.4+ MB


## En TRAVAUX ##

In [None]:
from google.colab import files
df.to_parquet('df_triple.parquet')
files.download('df_triple.parquet')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
df["first_release_date"] = pd.to_datetime(df["first_release_date"])

current_year = pd.Timestamp.now().year
df['year'] = df['first_release_date'].dt.year
df = df[df['year'] <= current_year]
df['decennie'] = (df['year'] // 10) * 10

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["first_release_date"] = pd.to_datetime(df["first_release_date"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['year'] = df['first_release_date'].dt.year


In [None]:
df.head(10)

Unnamed: 0,name,first_release_date,genres,platforms,summary,rating,rating_count,cover,year,decennie
2,Ground Zero Hero,2024-12-31,"Adventure, Indie","PC (Microsoft Windows), Nintendo Switch, Xbox ...",Wacky action-roguelite survival in a post-apoc...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020
3,Re:Pik 3 Deluxe,2024-12-25,"Strategy, Adventure",Nintendo Switch,,,,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020
4,Call of Duty: Black Ops 6,2024-10-25,Shooter,"PC (Microsoft Windows), PlayStation 4, Xbox On...","Developed by Treyarch and Raven, Black Ops 6 i...",,,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020
5,Damikira,2024-10-02,"Shooter, Arcade",Nintendo Switch,Use your grappling hook and blast your way thr...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020
6,Kill Knight,2024-10-02,"Shooter, Indie, Arcade","PC (Microsoft Windows), Nintendo Switch, PlayS...",KILL KNIGHT is an ultra-responsive arcade-insp...,,,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020
7,Dave the Diver: Mxmtoon,2024-10-01,,"PC (Microsoft Windows), Mac, PlayStation 4, Ni...",,,,,2024,2020
8,Dave the Diver: Balatro,2024-10-01,,"PC (Microsoft Windows), Mac, PlayStation 4, Ni...",,,,,2024,2020
9,Dave the Diver: Potion Craft,2024-10-01,,"PC (Microsoft Windows), Mac, PlayStation 4, Ni...",,,,,2024,2020
10,Guilty Gear: Strive - Additional Character 14:...,2024-10-01,,"PC (Microsoft Windows), PlayStation 4, Xbox On...",Adds Queen Dizzy to the roster of playable cha...,,,,2024,2020
11,Sword Art Online: Fractured Daydream - Premium...,2024-10-01,,Nintendo Switch,,,,,2024,2020


In [None]:
df_clean = df.dropna(subset=['year','rating','genres'])
df_clean_genre = df_clean.dropna(subset=['genres'])

In [None]:
df_clean_genre.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4685 entries, 135 to 23456
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   name                4685 non-null   object        
 1   first_release_date  4685 non-null   datetime64[ns]
 2   genres              4685 non-null   object        
 3   platforms           4685 non-null   object        
 4   summary             4685 non-null   object        
 5   rating              4685 non-null   float64       
 6   rating_count        4685 non-null   float64       
 7   cover               4684 non-null   object        
 8   year                4685 non-null   int32         
 9   decennie            4685 non-null   int32         
dtypes: datetime64[ns](1), float64(2), int32(2), object(5)
memory usage: 366.0+ KB


In [None]:
# Séparer les genres en une liste en utilisant plusieurs séparateurs
df_clean['genres_split'] = df_clean['genres'].str.split(r',\s*|\s*,\s*')

# Supprimer les espaces en trop autour des genres
df_clean['genres_split'] = df_clean['genres_split'].apply(lambda x: [genre.strip() for genre in x])

# Créer une liste unique de genres
all_genres = set(genre for sublist in df_clean['genres_split'] for genre in sublist)

# Créer des colonnes pour chaque genre
for genre in all_genres:
    df_clean[genre] = df_clean['genres_split'].apply(lambda x: 1 if genre in x else 0)

# Supprimer la colonne temporaire 'genres_split'
df_clean = df_clean.drop(columns=['genres_split'])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['genres_split'] = df_clean['genres'].str.split(r',\s*|\s*,\s*')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean['genres_split'] = df_clean['genres_split'].apply(lambda x: [genre.strip() for genre in x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clean[genre] = df_clean['genr

In [None]:
# Garder les lignes où au moins une des colonnes PlayStation 5, Xbox Series, ou Switch est égale à 1
df_clean_filtered = df_clean[(df_clean['PlayStation 5'] == 1) |
                             (df_clean['Xbox Series'] == 1) |
                             (df_clean['Nintendo Switch'] == 1)]
df_clean_filtered.head()

KeyError: 'Xbox Series'

In [None]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4685 entries, 135 to 23456
Data columns (total 34 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   name                        4685 non-null   object        
 1   first_release_date          4685 non-null   datetime64[ns]
 2   genres                      4685 non-null   object        
 3   platforms                   4685 non-null   object        
 4   summary                     4685 non-null   object        
 5   rating                      4685 non-null   float64       
 6   rating_count                4685 non-null   float64       
 7   cover                       4684 non-null   object        
 8   year                        4685 non-null   int32         
 9   decennie                    4685 non-null   int32         
 10                              4685 non-null   int64         
 11  Tactical                    4685 non-null   int64         

In [None]:
df_clean.head()

Unnamed: 0,name,first_release_date,genres,platforms,summary,rating,rating_count,cover,year,decennie,Unnamed: 11,Tactical,MOBA,Adventure,Turn-based strategy (TBS),Visual Novel,Hack and slash/Beat 'em up,Quiz/Trivia,Music,Pinball,Sport,Point-and-click,Real Time Strategy (RTS),Fighting,Indie,Arcade,Racing,Shooter,Puzzle,Strategy,Card & Board Game,Role-playing (RPG),Platform,Simulator
135,Shadows of Doubt,2024-09-26,"Role-playing (RPG), Adventure, Indie","PC (Microsoft Windows), PlayStation 5, Xbox Se...",Shadows of Doubt is a detective stealth game s...,70.628078,5.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
201,Frostpunk 2,2024-09-20,"Simulator, Strategy, Indie","PC (Microsoft Windows), Mac, PlayStation 5, Xb...",Frostpunk 2 is the sequel to the highly acclai...,81.094314,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1
235,Aura of Worlds,2024-09-19,"Platform, Role-playing (RPG), Strategy, Tactic...","Linux, PC (Microsoft Windows), Mac, Nintendo S...","""Aura of worlds is a creative tactics rogue-li...",70.0,0.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0
256,The Plucky Squire,2024-09-17,"Platform, Puzzle, Adventure","PC (Microsoft Windows), Nintendo Switch, PlayS...",Storybook characters discover a three-dimensio...,70.080896,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0
346,"Warhammer 40,000: Space Marine II",2024-09-09,"Shooter, Role-playing (RPG), Adventure","PC (Microsoft Windows), PlayStation 5, Xbox Se...",The galaxy is in peril. Entire worlds are fall...,79.727888,27.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0


In [None]:
# Remplacer les valeurs nulles par une chaîne vide pour éviter les erreurs
df_clean['platforms'] = df_clean['platforms'].fillna('')

# Séparer les plateformes en une liste en utilisant plusieurs séparateurs
df_clean['platforms_split'] = df_clean['platforms'].str.split(r',\s*|\s*,\s*')

# Supprimer les espaces en trop autour des plateformes
df_clean['platforms_split'] = df_clean['platforms_split'].apply(lambda x: [platform.strip() for platform in x])

# Créer une liste unique de plateformes
all_platforms = set(platform for sublist in df_clean['platforms_split'] for platform in sublist)

# Créer des colonnes pour chaque plateforme
for platform in all_platforms:
    df_clean[platform] = df_clean['platforms_split'].apply(lambda x: 1 if platform in x else 0)

# Supprimer la colonne temporaire 'platforms_split'
df_clean = df_clean.drop(columns=['platforms_split'])


In [None]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4685 entries, 135 to 23456
Data columns (total 95 columns):
 #   Column                                                                  Non-Null Count  Dtype         
---  ------                                                                  --------------  -----         
 0   name                                                                    4685 non-null   object        
 1   first_release_date                                                      4685 non-null   datetime64[ns]
 2   genres                                                                  4685 non-null   object        
 3   platforms                                                               4685 non-null   object        
 4   summary                                                                 4685 non-null   object        
 5   rating                                                                  4685 non-null   float64       
 6   rating_count              

In [None]:
df_clean.head(5)

Unnamed: 0,name,first_release_date,genres,platforms,summary,rating,rating_count,cover,year,decennie,Unnamed: 11,Tactical,MOBA,Adventure,Turn-based strategy (TBS),Visual Novel,Hack and slash/Beat 'em up,Quiz/Trivia,Music,Pinball,Sport,Point-and-click,Real Time Strategy (RTS),Fighting,Indie,Arcade,Racing,Shooter,Puzzle,Strategy,Card & Board Game,Role-playing (RPG),Platform,Simulator,"PC (Microsoft Windows), Android, PlayStation Portable, Nintendo Switch",PlayStation Vita,Linux,Dreamcast,New Nintendo 3DS,Gear VR,Xbox 360,PlayStation VR2,Windows Phone,Oculus Rift,Game Boy Advance,Neo Geo AES,Super Nintendo Entertainment System,OnLive Game System,Amazon Fire TV,Nintendo Switch,Nintendo 64,Oculus VR,Daydream,Sega Mega Drive/Genesis,PC (Microsoft Windows),Legacy Mobile Device,Android,visionOS,Wii,SteamVR,Xbox One,Nintendo Entertainment System,PlayStation Portable,Mac,Windows Mixed Reality,DVD Player,PlayStation 2,Xbox,Sega Saturn,Web browser,Xbox Series X|S,PlayStation,OOParts,Nintendo GameCube,BlackBerry OS,Wii U,DOS,Google Stadia,Nintendo DS,Leapster,Windows Mobile,PlayStation VR,N-Gage,PlayStation 5,iOS,Neo Geo MVS,Arduboy,PlayStation 3,Meta Quest 3,Meta Quest 2,Ouya,Nintendo 3DS,PlayStation 4,Oculus Quest,Neo Geo CD
135,Shadows of Doubt,2024-09-26,"Role-playing (RPG), Adventure, Indie","PC (Microsoft Windows), PlayStation 5, Xbox Se...",Shadows of Doubt is a detective stealth game s...,70.628078,5.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
201,Frostpunk 2,2024-09-20,"Simulator, Strategy, Indie","PC (Microsoft Windows), Mac, PlayStation 5, Xb...",Frostpunk 2 is the sequel to the highly acclai...,81.094314,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
235,Aura of Worlds,2024-09-19,"Platform, Role-playing (RPG), Strategy, Tactic...","Linux, PC (Microsoft Windows), Mac, Nintendo S...","""Aura of worlds is a creative tactics rogue-li...",70.0,0.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
256,The Plucky Squire,2024-09-17,"Platform, Puzzle, Adventure","PC (Microsoft Windows), Nintendo Switch, PlayS...",Storybook characters discover a three-dimensio...,70.080896,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
346,"Warhammer 40,000: Space Marine II",2024-09-09,"Shooter, Role-playing (RPG), Adventure","PC (Microsoft Windows), PlayStation 5, Xbox Se...",The galaxy is in peril. Entire worlds are fall...,79.727888,27.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


In [None]:
df_clean = df_clean.reset_index(drop=True)
df_clean

Unnamed: 0,name,first_release_date,genres,platforms,summary,rating,rating_count,cover,year,decennie,Unnamed: 11,Tactical,MOBA,Adventure,Turn-based strategy (TBS),Visual Novel,Hack and slash/Beat 'em up,Quiz/Trivia,Music,Pinball,Sport,Point-and-click,Real Time Strategy (RTS),Fighting,Indie,Arcade,Racing,Shooter,Puzzle,Strategy,Card & Board Game,Role-playing (RPG),Platform,Simulator,"PC (Microsoft Windows), Android, PlayStation Portable, Nintendo Switch",PlayStation Vita,Linux,Dreamcast,New Nintendo 3DS,Gear VR,Xbox 360,PlayStation VR2,Windows Phone,Oculus Rift,Game Boy Advance,Neo Geo AES,Super Nintendo Entertainment System,OnLive Game System,Amazon Fire TV,Nintendo Switch,Nintendo 64,Oculus VR,Daydream,Sega Mega Drive/Genesis,PC (Microsoft Windows),Legacy Mobile Device,Android,visionOS,Wii,SteamVR,Xbox One,Nintendo Entertainment System,PlayStation Portable,Mac,Windows Mixed Reality,DVD Player,PlayStation 2,Xbox,Sega Saturn,Web browser,Xbox Series X|S,PlayStation,OOParts,Nintendo GameCube,BlackBerry OS,Wii U,DOS,Google Stadia,Nintendo DS,Leapster,Windows Mobile,PlayStation VR,N-Gage,PlayStation 5,iOS,Neo Geo MVS,Arduboy,PlayStation 3,Meta Quest 3,Meta Quest 2,Ouya,Nintendo 3DS,PlayStation 4,Oculus Quest,Neo Geo CD
0,Shadows of Doubt,2024-09-26,"Role-playing (RPG), Adventure, Indie","PC (Microsoft Windows), PlayStation 5, Xbox Se...",Shadows of Doubt is a detective stealth game s...,70.628078,5.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,Frostpunk 2,2024-09-20,"Simulator, Strategy, Indie","PC (Microsoft Windows), Mac, PlayStation 5, Xb...",Frostpunk 2 is the sequel to the highly acclai...,81.094314,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,Aura of Worlds,2024-09-19,"Platform, Role-playing (RPG), Strategy, Tactic...","Linux, PC (Microsoft Windows), Mac, Nintendo S...","""Aura of worlds is a creative tactics rogue-li...",70.000000,0.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,The Plucky Squire,2024-09-17,"Platform, Puzzle, Adventure","PC (Microsoft Windows), Nintendo Switch, PlayS...",Storybook characters discover a three-dimensio...,70.080896,8.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,"Warhammer 40,000: Space Marine II",2024-09-09,"Shooter, Role-playing (RPG), Adventure","PC (Microsoft Windows), PlayStation 5, Xbox Se...",The galaxy is in peril. Entire worlds are fall...,79.727888,27.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,2024,2020,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4680,Strikers 1945 II,1997-12-31,"Shooter, Arcade","PC (Microsoft Windows), PlayStation, Sega Satu...","Continuing where the last game ended, the forc...",83.000000,4.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,1997,1990,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
4681,Putt-Putt Travels Through Time,1997-11-21,"Point-and-click, Adventure","PC (Microsoft Windows), Mac, Android, iOS, Pla...",Mr. Firebird's time machine has gone haywire a...,82.038293,9.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,1997,1990,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
4682,"Spy Fox in ""Dry Cereal""",1997-10-17,"Point-and-click, Adventure","Wii, PC (Microsoft Windows), Mac, iOS, PlaySta...","The super-suave agent, SPY Fox, must find Mr. ...",79.942740,11.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,1997,1990,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
4683,Chasm: The Rift,1997-09-30,"Fighting, Shooter","PC (Microsoft Windows), DOS, PlayStation 4, Xb...","Humankind, in the process of evolution, has en...",50.082482,6.0,//images.igdb.com/igdb/image/upload/t_thumb/co...,1997,1990,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0


In [None]:
df_export = df_clean


In [None]:
df_export.to_parquet('df_igdb.parquet', index=False)
files.download('df_igdb.parquet')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
pip install ydata_profiling

Collecting ydata_profiling
  Downloading ydata_profiling-4.10.0-py2.py3-none-any.whl.metadata (20 kB)
Collecting visions<0.7.7,>=0.7.5 (from visions[type_image_path]<0.7.7,>=0.7.5->ydata_profiling)
  Downloading visions-0.7.6-py3-none-any.whl.metadata (11 kB)
Collecting htmlmin==0.1.12 (from ydata_profiling)
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting phik<0.13,>=0.11.1 (from ydata_profiling)
  Downloading phik-0.12.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting multimethod<2,>=1.4 (from ydata_profiling)
  Downloading multimethod-1.12-py3-none-any.whl.metadata (9.6 kB)
Collecting imagehash==4.3.1 (from ydata_profiling)
  Downloading ImageHash-4.3.1-py2.py3-none-any.whl.metadata (8.0 kB)
Collecting dacite>=1.8 (from ydata_profiling)
  Downloading dacite-1.8.1-py3-none-any.whl.metadata (15 kB)
Collecting PyWavelets (from imagehash==4.3.1->ydata_profiling)
  Downloading pywavelets-1.

In [None]:

# import pour ydata_profiling
import ydata_profiling
from ydata_profiling import ProfileReport
from ydata_profiling.utils.cache import cache_file

# creation d'un rapport exploratoire de notre dataframe

profile = ProfileReport(df_clean, title="Analyse Générale",html={'style': {'full_width': True}})

# Affichage de notre rapport exploratoire :

profile.to_notebook_iframe()


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
df_clean.shape

(4685, 95)

In [None]:
#px.line(df_total2, x="released", y=df_total2.select_dtypes(include='number').columns, title="Titre")