In [1]:
import pandas as pd
import os

In [2]:
shopping_data_file = os.path.join("Resources", "shopping_data.csv")
shopping_df = pd.read_csv(shopping_data_file)

In [3]:
shopping_df.head(10)

Unnamed: 0,CustomerID,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,Yes,19.0,15000,39.0
1,2,Yes,21.0,15000,81.0
2,3,No,20.0,16000,6.0
3,4,No,23.0,16000,77.0
4,5,No,31.0,17000,40.0
5,6,No,22.0,17000,76.0
6,7,No,35.0,18000,6.0
7,8,No,23.0,18000,94.0
8,9,Yes,64.0,19000,3.0
9,10,No,30.0,19000,72.0


In [4]:
shopping_df.columns

Index(['CustomerID', 'Card Member', 'Age', 'Annual Income',
       'Spending Score (1-100)'],
      dtype='object')

In [5]:
shopping_df.dtypes

CustomerID                  int64
Card Member                object
Age                       float64
Annual Income               int64
Spending Score (1-100)    float64
dtype: object

In [6]:
shopping_df.isnull().sum()

CustomerID                0
Card Member               2
Age                       2
Annual Income             0
Spending Score (1-100)    1
dtype: int64

In [7]:
# Drop null rows
shopping_df_no_null = shopping_df.dropna()

In [8]:
shopping_df_no_null.isnull().sum()

CustomerID                0
Card Member               0
Age                       0
Annual Income             0
Spending Score (1-100)    0
dtype: int64

In [9]:
shopping_df_no_null.duplicated().sum()

0

In [10]:
shopping_df_no_null.drop(columns=["CustomerID"], inplace=True)
shopping_df_no_null.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,Yes,19.0,15000,39.0
1,Yes,21.0,15000,81.0
2,No,20.0,16000,6.0
3,No,23.0,16000,77.0
4,No,31.0,17000,40.0


In [11]:
shopping_df_clean = shopping_df_no_null
shopping_df_clean.head()

Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,Yes,19.0,15000,39.0
1,Yes,21.0,15000,81.0
2,No,20.0,16000,6.0
3,No,23.0,16000,77.0
4,No,31.0,17000,40.0


In [12]:
# Transform String column
def change_string(card_member):
    if card_member == "Yes":
        return 1
    else:
        return 0

shopping_df_clean["Card Member"] = shopping_df_clean["Card Member"].apply(change_string)
shopping_df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15000,39.0
1,1,21.0,15000,81.0
2,0,20.0,16000,6.0
3,0,23.0,16000,77.0
4,0,31.0,17000,40.0


In [13]:
shopping_df_clean["Annual Income"] = shopping_df_clean["Annual Income"] / 1000
shopping_df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Card Member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0


In [20]:
shopping_df_clean.columns

Index(['Card Member', 'Age', 'Annual Income', 'Spending Score (1-100)'], dtype='object')

In [24]:
shopping_df_clean.rename(columns={'Card Member': 'card_member'}, errors="raise", inplace=True)
shopping_df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,card_member,Age,Annual Income,Spending Score (1-100)
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0


In [25]:
shopping_df_clean.rename(columns={'Age': 'age', 
                                  'Annual Income': 'annual_income', 
                                  'Spending Score (1-100)': 'spending_score'}, inplace=True)
shopping_df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,card_member,age,annual_income,spending_score
0,1,19.0,15.0,39.0
1,1,21.0,15.0,81.0
2,0,20.0,16.0,6.0
3,0,23.0,16.0,77.0
4,0,31.0,17.0,40.0


In [26]:
# Save cleaned data
file_path = os.path.join("Resources", "clean_shopping_data.csv")
shopping_df_clean.to_csv(file_path, index=False)