## Project Milestone 2 DSC 420 Cleaning/Formating Flat File Source
create by: David Hatchett  
created: 2024-01-18  

In [1]:
import pandas as pd
import numpy as np
import json
import sqlite3

from datetime import datetime

### Functions

In [210]:
## Cell holds all the functions I used to process the flat file.

def create_stats_data(line_data:dict) -> pd.DataFrame:
    '''
    The amount of stats recorded for each game is inconsistent. The idea
    is to break up the data for each stat into one line that will end up
    in a database table. I can then use all the tables to create a 
    final data set.

    the stats are 3 levels deep. each level can 
    more than one value. This is why we are 
    using the loop below. This is a lot 
    hard to understand in list compersion format
    '''
    stats_data = list()

    for k, v in line_data['Stats'].items():
        for k2, v2 in v.items():
            for k3, v3 in v2.items():
                tt_dict = dict()
                tt_dict['name'] = data['Name']
                tt_dict['release_date_index'] = data['Release_date']
                tt_dict['category'] = k
                tt_dict['sub_cat'] = k2
                tt_dict['sub_cat2'] = k3

                if k3 != 'Polled':
                    v3 = convert_time(v3)
                tt_dict['value'] = v3
                stats_data.append(tt_dict)
                
                
    return pd.DataFrame(stats_data)  

def create_genres_lists(line_data:dict) -> pd.DataFrame:
    '''
    Split the genre data into a list to create 1 line per genre.
    This will allow me to create a more targeted data set later.
    The same issue is true with the Stats: too many with 1 to many 
    makes it hard to categorize unless I can pick and choose the 
    correct ones.
    '''
    genres = list()
    for value in line_data['Genres'].split(','):
        iteam_dic=dict()
        iteam_dic['name'] = line_data['Name']
        iteam_dic['release_date_index'] = data['Release_date']
        iteam_dic['genres'] = value
        genres.append(iteam_dic)

    return pd.DataFrame(genres)


def convert_time(value:str) -> float:
    '''
    Conver the 10H 53M format into only minutes
    takes a line in and finds the values before 
    the h and the m and calculate the total time 
    in minutes.

    '''
    hrs_index = value.find('h')
    mins_index = value.find('m')

    hrs = 0
    mins = 0
    
    if hrs_index != -1:
        if mins_index != -1:
            mins = int(value[hrs_index+1:mins_index])
        hrs = int(value[0:hrs_index])
        hrs = hrs * 60
        
    return hrs + mins

def fix_date(row):
    '''
    The dates are stored in a weird format.
    this converts the dates column to an 
    actual DateTime object. if it's an 
    invalid format for a date returns 
    np.Nan
    '''

    if row == None:
        return np.nan
    elif row[0:4] == '0000':
         return np.nan
    elif row[-2:] == '00':
        return np.nan
    elif row[5:7] == '00':
        return np.nan
    else:
       return datetime.strptime(row, '%Y-%m-%d').date()
        
def exstact_year(row):
    '''
    extracts the 4-digit year from the date 
    column. if a type error occurs, pass back
    a np.nan
    '''
    try:
        return int(row[0:4]) if int(row[0:4]) !=0 else np.nan
    except TypeError as er:
        return np.nan

def clean_up_stage_tabels():
     with sqlite3.connect("game_data.db") as con:
         cur = con.cursor()
         cur.execute('DROP TABLE IF EXISTS game_stats_stage')
         cur.execute('DROP TABLE IF EXISTS genres_stage')
         cur.execute('Drop TABLE IF EXISTS game_list_stage')
         cur.close()

def set_up_ddl():
    '''
    checks the database and creates the prod 
    tables if they do not exist already
    '''
    with sqlite3.connect("game_data.db") as con:
        cur = con.cursor()
        cur.execute("""
        CREATE TABLE IF NOT EXISTS ttb_game_list (
            id INTEGER PRIMARY KEY AUTOINCREMENT
            , game_name VARCHAR
            , relase_date_key VARVCHAR(10)
            , steam_app_id INTEGER
            , review_score REAL
            , relase_date DATE
            , relase_year INTEGER
        );""")
    
        cur.execute("""
        CREATE TABLE IF NOT EXISTS ttb_game_genres (
            id INTEGER PRIMARY KEY AUTOINCREMENT
            , ttb_game_id INTEGER
            , genres VARCHAR
        );""")
    
        cur.execute("""
        CREATE TABLE IF NOT EXISTS ttb_game_stats (
            id INTEGER PRIMARY KEY AUTOINCREMENT
            , ttb_game_id INTEGER
            , category VARCHAR
            , sub_category_1 VARCHAR
            , sub_category_2 VARCHAR
            , value REAL
        );""")    


### Prep the Database Enviroment

In [243]:
## clean up envroment prior to run
clean_up_stage_tabels()
set_up_ddl()

### Load the Data and Database

In [156]:
## Create the primary datasets from the flat file
## also loads the stats and genres strait to the DB 
## Data gets rather large to keep in a data frame
## We can recomb later when needed.

## this takes about a minute or so to run. if it starts to take too long look at trying to batch up more records before writeing 
## to the database. for now, it's not too bad, about 2 to 5 mins. you can also commit out the loads to DB if you know nothing has 
## changed.


game_list = list()

#open db connection
with sqlite3.connect("game_data.db") as con:

    #open file connection
    with open('data/hltb.jsonlines','r') as flt_fl:

        #process each line
        for line in flt_fl:
            data = json.loads(line)

            ## this is Transform 1 splitting json list into a game list
            game_line_data = dict([(key,value) for key, value in data.items() if key not in ('Stats', 'Genres')])
            game_list.append(game_line_data)

            ## this is Transform 2: splitting and creating the stats data
            create_stats_data(data).to_sql('game_stats_stage',con,if_exists='append')

            ## this is transform 3: splitting and creating the genres data
            create_genres_lists(data).to_sql('genres_stage',con,if_exists='append')

In [157]:
#put the data sets into pandas
game_list_df = pd.DataFrame(game_list)

# this is Transform 4: lowercasing the columns to make them easier to work with
game_list_df.columns = game_list_df.columns.str.lower()


print(f'the game_list_df dataframe has a length of {len(game_list_df)} and looks like :')
game_list_df.head()

the game_list_df dataframe has a length of 60409 and looks like :


Unnamed: 0,name,steam_app_id,release_date,review_score
0,Borderlands 3,397540,2019-09-13,76
1,Dying Light,239140,2015-01-27,78
2,Middle-Earth: Shadow of War,356190,2017-10-10,76
3,Counter-Strike: Global Offensive,730,2012-08-21,74
4,Grand Theft Auto IV,5152,2008-04-29,82


### Null Review

In [158]:
# Lets look at what Null values exist
game_list_df.isnull().sum()

name                0
steam_app_id    27402
release_date        2
review_score        0
dtype: int64

We see that there a lot of null's in the Steam App Id field this is ok. Some games on this list are so old they can't run on modern hardware and where never avaliable in steam.

### Review_score Review

In [159]:
game_list_df['review_score'].value_counts()

review_score
0     23323
70     3779
60     3444
80     2252
50     2240
      ...  
16        3
96        2
98        1
12        1
19        1
Name: count, Length: 90, dtype: int64

In [160]:
game_list_df['review_score'].describe()

count    60409.000000
mean        38.484067
std         33.237827
min          0.000000
25%          0.000000
50%         50.000000
75%         70.000000
max        100.000000
Name: review_score, dtype: float64

The zeros seem to represent games that did not get a review and they make up a large amount of our data about 38%. Lets set the the zeros to nan incase we deciced to do anything with the values.

In [161]:
## Transform 5: Updating the zeros to np.nan
game_list_df['review_score'] = game_list_df['review_score'].replace(0,np.nan)

In [162]:
game_list_df['review_score'].describe()

count    37086.000000
mean        62.686297
std         16.803290
min         10.000000
25%         54.000000
50%         65.000000
75%         74.000000
max        100.000000
Name: review_score, dtype: float64

This looks a lot better now that we are ingoring the 0. 

### Review of release_date

In [163]:
game_list_df['release_date'].value_counts()

release_date
0000-00-00    3110
1994-00-00     365
1993-00-00     325
1992-00-00     311
1991-00-00     310
              ... 
2015-04-19       1
2019-09-28       1
1988-10-21       1
2022-03-13       1
1987-03-14       1
Name: count, Length: 9321, dtype: int64

It looks like we have a chuck of games without a firm release date and a chuck of games with only the year or year and month in the date.
Let's create some new fields 1. and an actual data time object field for the records we can. 2. a Release year field. In both these fields, the unknown dates will be np.nan.

In [164]:
#transform 6 Create an acutal date field and a year field
game_list_df['release_date_datetime']= game_list_df['release_date'].apply(fix_date)

#transformation 7 Create a realse year column may be easier to use.
game_list_df['release_year'] = game_list_df['release_date'].apply(exstact_year)


In [165]:
game_list_df['release_date_datetime'].describe()

count          49120
unique          8787
top       2016-04-05
freq              61
Name: release_date_datetime, dtype: object

In [166]:
game_list_df['release_year'].describe()

count    57286.000000
mean      2010.610533
std         10.199731
min       1962.000000
25%       2005.000000
50%       2015.000000
75%       2018.000000
max       2024.000000
Name: release_year, dtype: float64

The list seems resonable after preforming a google search we see the first video game was released in 1958.

### Review of Name

In [167]:
## find all the duplicates names
dups = game_list_df.duplicated(subset=['name','release_date'], keep=False)

In [168]:
game_list_df[dups].sort_values(by='name')

Unnamed: 0,name,steam_app_id,release_date,review_score,release_date_datetime,release_year
58718,Asylum,230210.0,0000-00-00,,,
56163,Asylum,,0000-00-00,80.0,,
20101,Aventura Copilului Albastru și Urât,1530140.0,2021-02-08,100.0,2021-02-08,2021.0
24373,Aventura Copilului Albastru și Urât,1530140.0,2021-02-08,,2021-02-08,2021.0
22100,Bakumatsu Korinden Oni,,1996-02-02,50.0,1996-02-02,1996.0
58897,Bakumatsu Korinden Oni,,1996-02-02,,1996-02-02,1996.0
8960,Dawn,605610.0,2017-03-31,70.0,2017-03-31,2017.0
42231,Dawn,605610.0,2017-03-31,68.0,2017-03-31,2017.0
15627,Dream T.V.,,1994-04-00,,,1994.0
39659,Dream T.V.,,1994-04-00,,,1994.0


lets keep the first records

In [169]:
dups2 = game_list_df.duplicated(subset=['name','release_date'], keep='first')
game_list_df[dups2].sort_values(by='name')

Unnamed: 0,name,steam_app_id,release_date,review_score,release_date_datetime,release_year
58718,Asylum,230210.0,0000-00-00,,,
24373,Aventura Copilului Albastru și Urât,1530140.0,2021-02-08,,2021-02-08,2021.0
58897,Bakumatsu Korinden Oni,,1996-02-02,,1996-02-02,1996.0
42231,Dawn,605610.0,2017-03-31,68.0,2017-03-31,2017.0
39659,Dream T.V.,,1994-04-00,,,1994.0
57348,Exile,,1988-12-00,,,1988.0
39107,Front Mission: Gun Hazard,,1996-02-23,,1996-02-23,1996.0
59019,Mare,1875560.0,2022-03-31,,2022-03-31,2022.0
28857,Mystery Case Files: The Black Veil,580010.0,2017-03-23,60.0,2017-03-23,2017.0
41445,PDC World Championship Darts: Pro Tour,,2010-11-26,,2010-11-26,2010.0


In [170]:
game_list_df = game_list_df.drop(dups2[dups2].index)
game_list_df

Unnamed: 0,name,steam_app_id,release_date,review_score,release_date_datetime,release_year
0,Borderlands 3,397540,2019-09-13,76.0,2019-09-13,2019.0
1,Dying Light,239140,2015-01-27,78.0,2015-01-27,2015.0
2,Middle-Earth: Shadow of War,356190,2017-10-10,76.0,2017-10-10,2017.0
3,Counter-Strike: Global Offensive,730,2012-08-21,74.0,2012-08-21,2012.0
4,Grand Theft Auto IV,5152,2008-04-29,82.0,2008-04-29,2008.0
...,...,...,...,...,...,...
60404,Left-Hand Path,488760,2016-06-20,,2016-06-20,2016.0
60405,Astro Attack,,1984-00-00,,,1984.0
60406,Apocalypse: Party's Over,368800,2016-05-30,,2016-05-30,2016.0
60407,10mg :),1425380,2020-10-15,78.0,2020-10-15,2020.0


### Load Games list Data to the Database

In [171]:
with sqlite3.connect("game_data.db") as con:
    game_list_df.to_sql('game_list_stage',con,if_exists='append')

### Review the Stage data prior to Load to Prod tables

#### genres_stage Review

In [251]:
con = sqlite3.connect("game_data.db") 
cur = con.cursor()

In [205]:
cols = cur.execute('PRAGMA table_info(genres_stage);')
for i in cols.fetchall():
    print(i)

(0, 'index', 'INTEGER', 0, None, 0)
(1, 'name', 'TEXT', 0, None, 0)
(2, 'release_date_index', 'TEXT', 0, None, 0)
(3, 'genres', 'TEXT', 0, None, 0)


In [206]:
records = cur.execute("""
WITH dups as (
    SELECT 
        name
        , release_date_index
        , genres              
    FROM genres_stage
    GROUP BY
        name
        , release_date_index
        , genres 
        HAVING COUNT(*) > 1
    )
SELECT
    A.name
    , A.release_date_index
    , A.genres  
FROM genres_stage AS A 
    INNER JOIN dups AS B
        ON 
            A.name = b.name
            AND A.release_date_index = B.release_date_index
            AND A.genres = B.genres
ORDER BY 
    A.name
    , A.release_date_index
    , A.genres
""")

In [207]:
for i in records.fetchall():
    print(i)

('Bakumatsu Korinden Oni', '1996-02-02', 'Role-Playing')
('Bakumatsu Korinden Oni', '1996-02-02', 'Role-Playing')
("Crazy Chicken: The Pharaoh's Treasure", '2009-06-02', ' Adventure')
("Crazy Chicken: The Pharaoh's Treasure", '2009-06-02', ' Adventure')
('Dawn', '2017-03-31', 'Adventure')
('Dawn', '2017-03-31', 'Adventure')
('Dream T.V.', '1994-04-00', 'Action')
('Dream T.V.', '1994-04-00', 'Action')
('King Arthur: The Druids', '2011-02-03', ' Strategy/Tactical')
('King Arthur: The Druids', '2011-02-03', ' Strategy/Tactical')
('King Arthur: The Saxons', '2009-11-24', ' Strategy/Tactical')
('King Arthur: The Saxons', '2009-11-24', ' Strategy/Tactical')
('Mare', '2022-03-31', ' Adventure')
('Mare', '2022-03-31', ' Adventure')
('Mare', '2022-03-31', 'Virtual Reality')
('Mare', '2022-03-31', 'Virtual Reality')
('Mystery Case Files: The Black Veil', '2017-03-23', 'Hidden Object')
('Mystery Case Files: The Black Veil', '2017-03-23', 'Hidden Object')
('PDC World Championship Darts: Pro Tour',

These can just be distincted on insert not issue here

#### game_stats_stage Review

In [173]:
cols = cur.execute('PRAGMA table_info(game_stats_stage);')
for i in cols.fetchall():
    print(i)

(0, 'index', 'INTEGER', 0, None, 0)
(1, 'name', 'TEXT', 0, None, 0)
(2, 'release_date_index', 'TEXT', 0, None, 0)
(3, 'category', 'TEXT', 0, None, 0)
(4, 'sub_cat', 'TEXT', 0, None, 0)
(5, 'sub_cat2', 'TEXT', 0, None, 0)
(6, 'value', 'TEXT', 0, None, 0)


In [203]:
records = cur.execute("""
WITH dups as (
    SELECT 
        name
        , release_date_index
        ,  category
        , sub_cat
        , sub_cat2                       
    FROM game_stats_stage
    GROUP BY
        name
        , release_date_index
        ,  category
        , sub_cat
        , sub_cat2
        HAVING COUNT(*) > 1
    )
SELECT
    A."index"
    , A.name
    , A.release_date_index
    , A.category
    , A.sub_cat
    , A.sub_cat2  
    , a.value
FROM game_stats_stage AS A 
    INNER JOIN dups AS B
        ON 
            A.name = b.name
            AND A.release_date_index = B.release_date_index
            AND A.category = B.category
            AND A.sub_cat = B.sub_cat
            AND A.sub_cat2 = B.sub_cat2
ORDER BY 
    A.name
    , a.release_date_index
    , a.category
    , a.sub_cat
    , a.sub_cat2
    , a."index"
                        """)

In [204]:
for i in records.fetchall():
    print(i)

(13, 'Dawn', '2017-03-31', 'Platform', 'PC', '100%', '76')
(28, 'Dawn', '2017-03-31', 'Platform', 'PC', '100%', '85')
(14, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Fastest', '73')
(29, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Fastest', '0')
(11, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Main', '0')
(26, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Main', '0')
(12, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Main +', '0')
(27, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Main +', '0')
(10, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Polled', '2')
(25, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Polled', '19')
(15, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Slowest', '79')
(30, 'Dawn', '2017-03-31', 'Platform', 'PC', 'Slowest', '180')
(6, 'Dawn', '2017-03-31', 'Single-Player', 'All PlayStyles', 'Average', '76')
(16, 'Dawn', '2017-03-31', 'Single-Player', 'All PlayStyles', 'Average', '0')
(9, 'Dawn', '2017-03-31', 'Single-Player', 'All PlayStyles', 'Leisure', '77')
(19, 'Dawn', '2017-03-31', 'Single-Player',

We should be able to use a windows function for the insert here. use a rank by the fields and then the values

### Load Data into Prod Tables

In [246]:
with sqlite3.connect("game_data.db") as con:
    cur = con.cursor()
    cur.execute("""
        INSERT INTO ttb_game_list (game_name,relase_date_key,steam_app_id,review_score,relase_date,relase_year)
        SELECT A.name, A.release_date, A.steam_app_id, A.review_score, A.release_date_datetime, A.release_year
        FROM game_list_stage AS A
            LEFT JOIN ttb_game_list AS B
                ON A.name = B.game_name
                AND IFNULL(A.release_date,'') = IFNULL(B.relase_date_key,'')
        WHERE
            b.game_name IS NULL;  
    """)

    cur.execute("""
        INSERT INTO ttb_game_genres (ttb_game_id, genres)
        SELECT DISTINCT
            b.id
            , a.genres
        FROM genres_stage A
            INNER JOIN ttb_game_list AS B
                ON A.name = B.game_name
                AND IFNULL(A.release_date_index,'') = IFNULL(B.relase_date_key,'')
            LEFT JOIN ttb_game_genres AS C
                ON b.id = c.ttb_game_id
                AND A.genres = C.genres
        WHERE c.ttb_game_id IS NULL
    """)

    cur.execute("""
        INSERT INTO ttb_game_stats (ttb_game_id, category, sub_category_1, sub_category_2 , value)
        WITH rcrd_cln AS (
            SELECT
                A.name
                , A.release_date_index
                , A.category
                , A.sub_cat
                , A.sub_cat2  
                , a.value
                , ROW_NUMBER() OVER(PARTITION BY A.name, a.release_date_index, a.category, a.sub_cat, a.sub_cat2 ORDER BY A.value desc) as rwnm
            FROM game_stats_stage AS A 
        )
        SELECT
            b.id
            , a.category
            , a.sub_cat
            , a.sub_cat2
            , a.value
        FROM rcrd_cln AS A
            INNER JOIN ttb_game_list AS B
                ON A.name = B.game_name
                AND IFNULL(A.release_date_index,'') = IFNULL(B.relase_date_key,'')
            LEFT JOIN ttb_game_stats AS C
                ON B.id  = C.ttb_game_id 
                AND a.category = c.category
                AND a.sub_cat = c.sub_category_1
                AND A.sub_cat2 = C.sub_category_2
        WHERE
            c.ttb_game_id IS NULL
            AND a.rwnm = 1
    """)

In [248]:
records =  cur.execute("""
    SELECT 1 INDX, 'ttb_game_list' AS SRC, COUNT(*) AS CNT FROM ttb_game_list
    UNION SELECT 2 INDX, 'game_list_stage' AS SRC, COUNT(*) AS CNT FROM game_list_stage
    UNION SELECT 3 INDX, 'ttb_game_genres' AS SRC, COUNT(*) AS CNT FROM ttb_game_genres    
    UNION SELECT 4 INDX, 'genres_stage' AS SRC, COUNT(*) AS CNT FROM genres_stage
    UNION SELECT 5 INDX,  'ttb_game_stats' AS SRC, COUNT(*) AS CNT FROM ttb_game_stats
    UNION SELECT 6 INDX,'game_stats_stage' AS SRC, COUNT(*) AS CNT FROM game_stats_stage
    ORDER BY INDX;
    """)

print('counts on all prod tables and stage')
for i in records.fetchall():
    print(i)

counts on all prod tables and stage
(1, 'ttb_game_list', 60397)
(2, 'game_list_stage', 60397)
(3, 'ttb_game_genres', 97431)
(4, 'genres_stage', 97442)
(5, 'ttb_game_stats', 974973)
(6, 'game_stats_stage', 974999)


Values are in the exspected range after removel of duplicates.

### Pull out the record and print it out for the assigment

In [252]:
sql_txt ="""
            SELECT 
                A.id
                , A.game_name
                , A.steam_app_id
                , A.review_score
                , A.relase_date
                , A.relase_year
                , B.genres
                , C.category
                , C.sub_category_1
                , C.sub_category_2
                , C.value
            FROM ttb_game_list AS A
                LEFT JOIN ttb_game_genres B
                    ON A.ID = B.ttb_game_id
                LEFT JOIN ttb_game_stats C
                    ON A.ID = C.ttb_game_id
            LIMIT 100;
    """

fln_df = pd.read_sql(sql_txt,con)
fln_df

Unnamed: 0,id,game_name,steam_app_id,review_score,relase_date,relase_year,genres,category,sub_category_1,sub_category_2,value
0,1,Borderlands 3,397540,76.0,2019-09-13,2019,Action,Additional Content,Bounty of Blood,100%,480.0
1,1,Borderlands 3,397540,76.0,2019-09-13,2019,Action,Additional Content,Bounty of Blood,Main,420.0
2,1,Borderlands 3,397540,76.0,2019-09-13,2019,Action,Additional Content,Bounty of Blood,Main+,660.0
3,1,Borderlands 3,397540,76.0,2019-09-13,2019,Action,Additional Content,Bounty of Blood,Polled,74%
4,1,Borderlands 3,397540,76.0,2019-09-13,2019,Action,Additional Content,Bounty of Blood,Rated,300.0
...,...,...,...,...,...,...,...,...,...,...,...
95,1,Borderlands 3,397540,76.0,2019-09-13,2019,Shooter,Additional Content,"Guns, Love, and Tentacles",Rated,360.0
96,1,Borderlands 3,397540,76.0,2019-09-13,2019,Shooter,Additional Content,Moxxi's Heist of the Handsome Jackpot,100%,540.0
97,1,Borderlands 3,397540,76.0,2019-09-13,2019,Shooter,Additional Content,Moxxi's Heist of the Handsome Jackpot,Main,540.0
98,1,Borderlands 3,397540,76.0,2019-09-13,2019,Shooter,Additional Content,Moxxi's Heist of the Handsome Jackpot,Main+,660.0


### Clean up the stage tables

In [253]:
cur.close()
con.close()
clean_up_stage_tabels()

## Ethical Considerations

Overall, for my data set, there are no major ethical or regulatory concerns around my data and the transformations I have completed. I should still strive to represent the data as correctly as possible. Most of my transformations were structural and will likely match how the data was initially stored. My primary changes were to unpack the Stats and Genre data into individual rows. This will allow me to mix and match easily as I continue the project. The only change I performed that could misrepresent the data is cleaning up the duplicates from the Stats data. I chose to take the highest number in the value column. However, there isn't much more I can use to verify which one is correct.  

I’m unsure of how credible any dataset on Kaggle is; it depends on the creator's methods, and I believe all data from the source should be suspect when working on research projects unless you can verify the appropriate collection method. For this assignment, it's for a fun exercise, so the susceptibility of the data is not a concern.
