In [38]:
import pandas as pd
import datetime
import re


# 1
def df_nan_filter(df):
    """Apply filters on NaN values
    Args:
        df: pandas dataframe.
    Returns:
        Filtered Dataframe.
    Raises:
        This function shouldn't raise any Exception.
    """
    df.dropna(subset=['Size'], inplace=True)
    df['Languages'].fillna(value="EN", inplace=True)
    df['Price'].fillna(value=0.0, inplace=True)
    avg = df['Average User Rating'].notna().mean()
    df['Average User Rating'].fillna(value=avg, inplace=True)
    df['User Rating Count'].fillna(value=1, inplace=True)
    return df


# 2
def change_date_format(date: str):
    """Change date format from dd/mm/yyyy to yyyy-mm-dd
    Args:
        date: a string representing the date.
    Returns:
        The date in the format yyyy-mm-dd.
    Raises:
        This function shouldn't raise any Exception.
    """
    try:
        newdate = datetime.datetime.strptime(date, '%d/%m/%Y')
        newdate = newdate.strftime('%Y/%m/%d')
        return newdate
    except():
        return date


def string_filter(s: str):
    """Apply filters in order to clean the string.
    Args:
      s: string.
    Returns:
      Filtered String.
    Raises:
      This function shouldn't raise any Exception.
    """
    # filter : \\t, \\n, \\U1a1b2c3d4, \\u1a2b, \\x1a
    # turn \' into '
    # replace remaining \\ with \
    # turn multiple spaces into one space
    s = re.sub(r'''\\+(t|n|U[a-z0-9]{8}|u[a-z0-9]{4}|x[a-z0-9]{2}|[\.]{2})''', ' ', s)
    s = s.replace('\\\'', '\'').replace('\\\\', '\\')
    s = re.sub(r' +', ' ', s)
    return (s)


In [65]:
df = pd.read_csv('../appstore_games.csv')
# 1
dff = df_nan_filter(df)
# 2
dff["Original Release Date"] = dff["Original Release Date"].apply(lambda x: change_date_format(x))
# 3
dff["Description"] = dff["Description"].apply(lambda x: string_filter(x))
# 4
dff.drop_duplicates(subset=df.columns[0], inplace=True)
# 5
dff['Age Rating'] = dff['Age Rating'].apply(lambda x: re.findall(r'\d+', x)[0] )
dff['User Rating Count'] = pd.to_numeric(dff['User Rating Count'], downcast='integer')
dff['Size'] = pd.to_numeric(dff['Size'], downcast='integer')
# 6
indexNames = dff[ dff['Name'].apply(len) < 4 ].index
dff.drop(indexNames, inplace=True)

In [62]:
dff

Unnamed: 0,URL,ID,Name,Subtitle,Icon URL,Average User Rating,User Rating Count,Price,In-app Purchases,Description,Developer,Age Rating,Languages,Size,Primary Genre,Genres,Original Release Date,Current Version Release Date
0,https://apps.apple.com/us/app/sudoku/id284921427,284921427,Sudoku,,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,4.000000,3553,2.99,,"Join over 21,000,000 of our fans and download ...",Mighty Mighty Good Games,4,"DA, NL, EN, FI, FR, DE, IT, JA, KO, NB, PL, PT...",15853568,Games,"Games, Strategy, Puzzle",2008/07/11,30/05/2017
1,https://apps.apple.com/us/app/reversi/id284926400,284926400,Reversi,,https://is4-ssl.mzstatic.com/image/thumb/Purpl...,3.500000,284,1.99,,"The classic game of Reversi, also known as Oth...",Kiss The Machine,4,EN,12328960,Games,"Games, Strategy, Board",2008/07/11,17/05/2018
2,https://apps.apple.com/us/app/morocco/id284946595,284946595,Morocco,,https://is5-ssl.mzstatic.com/image/thumb/Purpl...,3.000000,8376,0.00,,Play the classic strategy game Othello (also k...,Bayou Games,4,EN,674816,Games,"Games, Board, Strategy",2008/07/11,5/09/2017
3,https://apps.apple.com/us/app/sudoku-free/id28...,285755462,Sudoku (Free),,https://is3-ssl.mzstatic.com/image/thumb/Purpl...,3.500000,190394,0.00,,"Top 100 free app for over a year. Rated ""Best ...",Mighty Mighty Good Games,4,"DA, NL, EN, FI, FR, DE, IT, JA, KO, NB, PL, PT...",21552128,Games,"Games, Strategy, Puzzle",2008/07/23,30/05/2017
4,https://apps.apple.com/us/app/senet-deluxe/id2...,285831220,Senet Deluxe,,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,3.500000,28,2.99,,"""Senet Deluxe - The Ancient Game of Life and A...",RoGame Software,4,"DA, NL, EN, FR, DE, EL, IT, JA, KO, NO, PT, RU...",34689024,Games,"Games, Strategy, Board, Education",2008/07/18,22/07/2018
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17002,https://apps.apple.com/us/app/stack-puzzle-ris...,1474626442,Stack Puzzle : Rise Tower,"Blast the cubes, solve puzzle!",https://is5-ssl.mzstatic.com/image/thumb/Purpl...,0.444608,1,0.00,,"The goal is very simple, move the square horiz...",Zhigang Pei,4,EN,64795648,Games,"Games, Entertainment, Casual, Strategy",2019/07/30,30/07/2019
17003,https://apps.apple.com/us/app/eachother/id1474...,1474919257,EachOther,,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,0.444608,1,0.00,,Collect a score while you play!! By linking ch...,Sultan Shindi,4,EN,110341120,Games,"Games, Family, Strategy",2019/08/01,1/08/2019
17004,https://apps.apple.com/us/app/rabbit-vs-tortoi...,1474962324,Rabbit Vs Tortoise,,https://is2-ssl.mzstatic.com/image/thumb/Purpl...,0.444608,1,0.00,,"""Rabbit Vs Tortoise is chess type cool simple ...",Vishal Baldha,4,EN,23207936,Games,"Games, Strategy",2019/08/03,3/08/2019
17005,https://apps.apple.com/us/app/fatall/id1474963671,1474963671,FaTaLL,Most fun game!!!,https://is1-ssl.mzstatic.com/image/thumb/Purpl...,0.444608,1,0.00,"9.99, 49.99, 3.99",Upgrade your character and use your skills to ...,Tayrem Games,4,EN,196750336,Games,"Games, Strategy, Action",2019/08/01,1/08/2019
