# Preparación de datasets

In [1]:
# importacion general de librerias
import pandas as pd

pd.options.display.float_format = '{:20,.2f}'.format # suprimimos la notacion cientifica en los outputs

import warnings
warnings.filterwarnings('ignore')

In [2]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [3]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [4]:
#https://drive.google.com/file/d/1y4_n4twjE4VSLSC-V7QsdPJ5JR9fiVgG/view?usp=sharing GooglePlayStore.csv

id='1y4_n4twjE4VSLSC-V7QsdPJ5JR9fiVgG'
downloaded = drive.CreateFile({'id': id})
downloaded.GetContentFile('GooglePlayStore.csv')

Se obtuvieron únicamente las columnas necesarias para la resolución de los problemas en cuestión.

In [5]:
appDetails=pd.read_csv('GooglePlayStore.csv', usecols=['App','Rating','Reviews','Current Ver'])

In [6]:
appDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.10,159,1.0.0
1,Coloring book moana,3.90,967,2.0.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",4.70,87510,1.2.4
3,Sketch - Draw & Paint,4.50,215644,Varies with device
4,Pixel Draw - Number Art Coloring Book,4.30,967,1.1
...,...,...,...,...
10836,Sya9a Maroc - FR,4.50,38,1.48
10837,Fr. Mike Schmitz Audio Teachings,5.00,4,1.0
10838,Parkinson Exercices FR,,3,1.0
10839,The SCP Foundation DB fr nn5n,4.50,114,Varies with device


In [7]:
appDetails.duplicated().value_counts()

False    10350
True       491
dtype: int64

Se droppearon las aplicaciones con el mismo valor en todas las columnas, dado que podrían existir aplicaciones con el mismo nombre.

In [8]:
validAppDetails = appDetails.drop_duplicates()
validAppDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.10,159,1.0.0
1,Coloring book moana,3.90,967,2.0.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",4.70,87510,1.2.4
3,Sketch - Draw & Paint,4.50,215644,Varies with device
4,Pixel Draw - Number Art Coloring Book,4.30,967,1.1
...,...,...,...,...
10836,Sya9a Maroc - FR,4.50,38,1.48
10837,Fr. Mike Schmitz Audio Teachings,5.00,4,1.0
10838,Parkinson Exercices FR,,3,1.0
10839,The SCP Foundation DB fr nn5n,4.50,114,Varies with device


Las aplicaciones sin nombre no son válidas:

In [9]:
validAppDetails = validAppDetails[appDetails['App'].notna()]
validAppDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.10,159,1.0.0
1,Coloring book moana,3.90,967,2.0.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",4.70,87510,1.2.4
3,Sketch - Draw & Paint,4.50,215644,Varies with device
4,Pixel Draw - Number Art Coloring Book,4.30,967,1.1
...,...,...,...,...
10836,Sya9a Maroc - FR,4.50,38,1.48
10837,Fr. Mike Schmitz Audio Teachings,5.00,4,1.0
10838,Parkinson Exercices FR,,3,1.0
10839,The SCP Foundation DB fr nn5n,4.50,114,Varies with device


In [10]:
validAppDetails.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10350 entries, 0 to 10840
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   App          10350 non-null  object 
 1   Rating       8885 non-null   float64
 2   Reviews      10350 non-null  object 
 3   Current Ver  10342 non-null  object 
dtypes: float64(1), object(3)
memory usage: 404.3+ KB


In [11]:
validAppDetails[validAppDetails['Rating']>5]

Unnamed: 0,App,Rating,Reviews,Current Ver
10472,Life Made WI-Fi Touchscreen Photo Frame,19.0,3.0M,4.0 and up


In [12]:
validAppDetails = validAppDetails.drop(10472)
validAppDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.10,159,1.0.0
1,Coloring book moana,3.90,967,2.0.0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",4.70,87510,1.2.4
3,Sketch - Draw & Paint,4.50,215644,Varies with device
4,Pixel Draw - Number Art Coloring Book,4.30,967,1.1
...,...,...,...,...
10836,Sya9a Maroc - FR,4.50,38,1.48
10837,Fr. Mike Schmitz Audio Teachings,5.00,4,1.0
10838,Parkinson Exercices FR,,3,1.0
10839,The SCP Foundation DB fr nn5n,4.50,114,Varies with device


Cambiamos el datatype de algunas columnas para mejorar el rendimiento.

In [13]:
validAppDetails['App']=validAppDetails['App'].astype('string')
validAppDetails['Rating']=validAppDetails['Rating'].astype('float')
validAppDetails['Reviews']=validAppDetails['Reviews'].astype('int')
validAppDetails.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10349 entries, 0 to 10840
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   App          10349 non-null  string 
 1   Rating       8884 non-null   float64
 2   Reviews      10349 non-null  int64  
 3   Current Ver  10341 non-null  object 
dtypes: float64(1), int64(1), object(1), string(1)
memory usage: 404.3+ KB


# Ejercicio 15
Para aquellas aplicaciones que se encuentren en su versión 1.0 (o 1.0.0) y que tengan más de 50000 reviews. ¿Cuál es el top 5 con mejor rating? (⭐)

In [14]:
ver1AppDetails = validAppDetails[validAppDetails['Current Ver'].isin(['1.0','1.0.0'])]
ver1AppDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.10,159,1.0.0
5,Paper flowers instructions,4.40,167,1.0
17,350 Diy Room Decor Ideas,4.50,27,1.0
23,Mcqueen Coloring pages,,61,1.0.0
29,Easy Realistic Drawing Tutorial,4.10,223,1.0
...,...,...,...,...
10823,List iptv FR,,1,1.0
10825,Naruto & Boruto FR,,7,1.0
10834,FR Calculator,4.00,7,1.0.0
10837,Fr. Mike Schmitz Audio Teachings,5.00,4,1.0


In [15]:
ver1Reviews50000AppDetails = ver1AppDetails[ver1AppDetails['Reviews']>50000]
ver1Reviews50000AppDetails

Unnamed: 0,App,Rating,Reviews,Current Ver
1523,Aviary Stickers: Free Pack,3.5,126862,1.0
2012,Racing in Car 2,4.3,234110,1.0
2231,My Little Pony Celebration,4.3,63160,1.0.0
4758,iSwipe Phone X,4.7,58366,1.0
5422,High School Simulator 2017,4.2,123136,1.0
5423,Hopeless Land: Fight for Survival,4.2,250197,1.0
7604,StirFry Stunts - We Bare Bears,4.5,121533,1.0.0
8309,Racing in Car 2,4.3,234589,1.0
9343,My Little Pony Celebration,4.3,63192,1.0.0
10211,Messenger,4.1,68025,1.0


El top 5 con mejor rating entre las aplicaciones de versión 1.0 (o 1.0.0) con más de 50000 reviews:

In [16]:
ver1Reviews50000AppDetails.sort_values(['Rating'], ascending=False).head(5)

Unnamed: 0,App,Rating,Reviews,Current Ver
4758,iSwipe Phone X,4.7,58366,1.0
7604,StirFry Stunts - We Bare Bears,4.5,121533,1.0.0
2012,Racing in Car 2,4.3,234110,1.0
2231,My Little Pony Celebration,4.3,63160,1.0.0
8309,Racing in Car 2,4.3,234589,1.0
