In [120]:
import requests
import pandas as pd 
import numpy as np

In [131]:
#Create my own fictual dataset. none of the following instructions apply to the solution
#Create a random 10 x 6 table of random numbers
df = pd.DataFrame(np.random.randn(10,6))

# Make a few areas have NaN values
df.iloc[1:3,1] = np.nan
df.iloc[5,3] = np.nan
df.iloc[7:9,5] = np.nan

df[4] = abs(abs(df[4].fillna(0).astype(int)) - 1)
df[0] = df[0] + 80
df[1] = df[1] + 36
df[2] = abs(df[2]) + 2
df[3] = abs(df[3]) + 2
df[5] = abs(df[5]) + 2

In [132]:
print(df)

           0          1         2         3  4         5
0  80.914679  35.578959  2.620474  3.349591  1  3.466382
1  78.601556        NaN  4.641759  2.833806  1  3.809029
2  78.973226        NaN  2.444402  2.014080  1  2.466787
3  78.826230  34.596444  2.220137  3.404217  1  2.113818
4  79.996784  35.055696  2.499747  2.101007  1  3.629533
5  78.187394  37.065302  2.246710       NaN  1  2.144251
6  80.803011  35.889693  3.470355  2.071308  0  2.299206
7  80.143999  35.236509  2.052439  3.361548  1       NaN
8  79.826630  35.404296  2.819728  2.043082  1       NaN
9  79.739847  34.940367  3.793548  2.127027  0  3.200789


In [133]:
#Cleaning: Correct column names

new_columns = ['latitude', 'longitude', 'order_accuracy', 'order_speed', 'likely_to_revisit', 'customer_satisfied']
df.columns = new_columns

print(df)

    latitude  longitude  order_accuracy  order_speed  likely_to_revisit  \
0  80.914679  35.578959        2.620474     3.349591                  1   
1  78.601556        NaN        4.641759     2.833806                  1   
2  78.973226        NaN        2.444402     2.014080                  1   
3  78.826230  34.596444        2.220137     3.404217                  1   
4  79.996784  35.055696        2.499747     2.101007                  1   
5  78.187394  37.065302        2.246710          NaN                  1   
6  80.803011  35.889693        3.470355     2.071308                  0   
7  80.143999  35.236509        2.052439     3.361548                  1   
8  79.826630  35.404296        2.819728     2.043082                  1   
9  79.739847  34.940367        3.793548     2.127027                  0   

   customer_satisfied  
0            3.466382  
1            3.809029  
2            2.466787  
3            2.113818  
4            3.629533  
5            2.144251  
6     

In [134]:
# Cleaning: Drop rows with NaN in the latitude/longitude column, since these are the identifiers
df.dropna(axis=0, subset=['latitude','longitude'], inplace=True)

print(df)

    latitude  longitude  order_accuracy  order_speed  likely_to_revisit  \
0  80.914679  35.578959        2.620474     3.349591                  1   
3  78.826230  34.596444        2.220137     3.404217                  1   
4  79.996784  35.055696        2.499747     2.101007                  1   
5  78.187394  37.065302        2.246710          NaN                  1   
6  80.803011  35.889693        3.470355     2.071308                  0   
7  80.143999  35.236509        2.052439     3.361548                  1   
8  79.826630  35.404296        2.819728     2.043082                  1   
9  79.739847  34.940367        3.793548     2.127027                  0   

   customer_satisfied  
0            3.466382  
3            2.113818  
4            3.629533  
5            2.144251  
6            2.299206  
7                 NaN  
8                 NaN  
9            3.200789  


In [135]:
# Cleaning: Replace the NaN value in the 'order_speed' value with "3" (not 0) to balance a fictional 1-5 scale.
df['order_speed'] = df['order_speed'].replace(np.nan, 3)

print(df)

    latitude  longitude  order_accuracy  order_speed  likely_to_revisit  \
0  80.914679  35.578959        2.620474     3.349591                  1   
3  78.826230  34.596444        2.220137     3.404217                  1   
4  79.996784  35.055696        2.499747     2.101007                  1   
5  78.187394  37.065302        2.246710     3.000000                  1   
6  80.803011  35.889693        3.470355     2.071308                  0   
7  80.143999  35.236509        2.052439     3.361548                  1   
8  79.826630  35.404296        2.819728     2.043082                  1   
9  79.739847  34.940367        3.793548     2.127027                  0   

   customer_satisfied  
0            3.466382  
3            2.113818  
4            3.629533  
5            2.144251  
6            2.299206  
7                 NaN  
8                 NaN  
9            3.200789  


In [136]:
# Cleaning: Replace the NaN value in the 'order_speed' value with "3" (not 0) to balance a fictional 1-5 scale.
df['customer_satisfied'] = df['customer_satisfied'].replace(np.nan, 3)

print(df)

    latitude  longitude  order_accuracy  order_speed  likely_to_revisit  \
0  80.914679  35.578959        2.620474     3.349591                  1   
3  78.826230  34.596444        2.220137     3.404217                  1   
4  79.996784  35.055696        2.499747     2.101007                  1   
5  78.187394  37.065302        2.246710     3.000000                  1   
6  80.803011  35.889693        3.470355     2.071308                  0   
7  80.143999  35.236509        2.052439     3.361548                  1   
8  79.826630  35.404296        2.819728     2.043082                  1   
9  79.739847  34.940367        3.793548     2.127027                  0   

   customer_satisfied  
0            3.466382  
3            2.113818  
4            3.629533  
5            2.144251  
6            2.299206  
7            3.000000  
8            3.000000  
9            3.200789  


In [137]:
# Cleaning: Change all the 1/0 booleans to True/False values
df['likely_to_revisit'] = df['likely_to_revisit'].astype(bool)  

print(df)

    latitude  longitude  order_accuracy  order_speed  likely_to_revisit  \
0  80.914679  35.578959        2.620474     3.349591               True   
3  78.826230  34.596444        2.220137     3.404217               True   
4  79.996784  35.055696        2.499747     2.101007               True   
5  78.187394  37.065302        2.246710     3.000000               True   
6  80.803011  35.889693        3.470355     2.071308              False   
7  80.143999  35.236509        2.052439     3.361548               True   
8  79.826630  35.404296        2.819728     2.043082               True   
9  79.739847  34.940367        3.793548     2.127027              False   

   customer_satisfied  
0            3.466382  
3            2.113818  
4            3.629533  
5            2.144251  
6            2.299206  
7            3.000000  
8            3.000000  
9            3.200789  
