<a href="https://colab.research.google.com/github/claytonmclamb/Spaceship-Titanic/blob/main/ReplacedMissing/DataCleaningFilled.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Cleaning Notebook
This notebook is designed to clean our training data for EDA and the development of our models!

In [None]:
# Loading Packages
import pandas as pd
import numpy as np
import random

## Loading Data

In [None]:
#Loading Training Data
url = "https://raw.githubusercontent.com/claytonmclamb/Spaceship-Titanic/main/train.csv"
train = pd.read_csv(url)
train.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True


In [None]:
#Loading Testing Data
url = "https://raw.githubusercontent.com/claytonmclamb/Spaceship-Titanic/main/test.csv"
test = pd.read_csv(url)
test.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name
0,0013_01,Earth,True,G/3/S,TRAPPIST-1e,27.0,False,0.0,0.0,0.0,0.0,0.0,Nelly Carsoning
1,0018_01,Earth,False,F/4/S,TRAPPIST-1e,19.0,False,0.0,9.0,0.0,2823.0,0.0,Lerome Peckers
2,0019_01,Europa,True,C/0/S,55 Cancri e,31.0,False,0.0,0.0,0.0,0.0,0.0,Sabih Unhearfus
3,0021_01,Europa,False,C/1/S,TRAPPIST-1e,38.0,False,0.0,6652.0,0.0,181.0,585.0,Meratz Caltilter
4,0023_01,Earth,False,F/5/S,TRAPPIST-1e,20.0,False,10.0,0.0,635.0,0.0,0.0,Brence Harperez


## Cleaning Data

### Passenger ID
Passenger ID has two parts, where the first indicates their group and the second indicates their position in the group. I will extract these


In [None]:
#Getting the Group
def getGroup(s):
  i = s.find("_")
  s = s[:i]
  return s
#Getting the Position in the group
def getPos(s):
  i = s.find("_")
  s = s[i+1:]
  return s

In [None]:
#Applying the group to the train and test data
train["Group"] = train["PassengerId"].apply(getGroup)
test["Group"] = test["PassengerId"].apply(getGroup)
#Ensuring it works
train.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4


In [None]:
#Applying the position to train and test data
train["Position"] = train["PassengerId"].apply(getPos)
test["Position"] = test["PassengerId"].apply(getPos)
train.head()

Unnamed: 0,PassengerId,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,Position
0,0001_01,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1,1
1,0002_01,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2,1
2,0003_01,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3,1
3,0003_02,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3,2
4,0004_01,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4,1


In [None]:
#Dropping PassengerId in train and test
train = train.drop(["PassengerId"], axis = 1)
test = test.drop(["PassengerId"], axis = 1)
#Ensuring it works so far
train.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,Position
0,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1,1
1,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2,1
2,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3,1
3,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3,2
4,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4,1


### HomePlanet
For classification **all** of our data should be numeric. I need to make HomePlanet a numeric variable. HomePlanet is a categorical variable, meaning it should be One-Hot encoded, which is a technique that we use to represent categorical variables as numerical values.

In [None]:
#One-Hot encoding (not the best way) --> Generally do it at the end of data cleaning
train["Earth"], train["Europa"], train["Mars"] = pd.get_dummies(train["HomePlanet"])["Earth"], pd.get_dummies(train["HomePlanet"])["Europa"], pd.get_dummies(train["HomePlanet"])["Mars"]
test["Earth"], test["Europa"], test["Mars"] = pd.get_dummies(test["HomePlanet"])["Earth"], pd.get_dummies(test["HomePlanet"])["Europa"], pd.get_dummies(test["HomePlanet"])["Mars"]
#Ensuring it works
train.head()

Unnamed: 0,HomePlanet,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,Position,Earth,Europa,Mars
0,Europa,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1,1,0,1,0
1,Earth,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2,1,1,0,0
2,Europa,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3,1,0,1,0
3,Europa,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3,2,0,1,0
4,Earth,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4,1,1,0,0


In [None]:
#Dropping HomePlanet
train = train.drop(["HomePlanet"], axis = 1)
test = test.drop(["HomePlanet"], axis = 1)
train.head()

Unnamed: 0,CryoSleep,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,Position,Earth,Europa,Mars
0,False,B/0/P,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1,1,0,1,0
1,False,F/0/S,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2,1,1,0,0
2,False,A/0/S,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3,1,0,1,0
3,False,A/0/S,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3,2,0,1,0
4,False,F/1/S,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4,1,1,0,0


### Cryo Sleep
CryoSleep is a boolean variable, it should be converted to numeric (0 and 1).

In [None]:
def convert(b):
  if b:
    return 1
  return 0

In [None]:
train["CryoSleep"] = train["CryoSleep"].apply(convert)
test["CryoSleep"] = test["CryoSleep"].apply(convert)

### Cabin
Takes the form deck/num/side. We will extract these values!

In [None]:
def deck(s):
  #199 values in cabin are NaN
  #In this case, I made it return the most common deck F
  try:
    i = s.find("/")
    return s[:i]
  except:
    return "F"

def num(s):
  #199 values in cabin are NaN
  #In this case, I made it return the most common number 1
  try:
    a = s.find("/")
    b = s.rfind("/")
    return s[a+1:b]
  except:
    return "1"

def side(s):
  #199 values in cabin are NaN
  #In this case, I made it have a 50/50 chance for what side (they are roughly equal)
  try:
    i = s.rfind("/")
    return s[i+1:]
  except:
    if random.random() < 0.5:
      return "P"
    else:
      return "S"

In [None]:
#Doing it to training Data
train["Deck"] = train["Cabin"].apply(deck)
train["Number"] = train["Cabin"].apply(num)
train["Side"] = train["Cabin"].apply(side)

In [None]:
#Doing it to testing data
test["Deck"] = test["Cabin"].apply(deck)
test["Number"] = test["Cabin"].apply(num)
test["Side"] = test["Cabin"].apply(side)

In [None]:
#Dropping Cabin
train = train.drop(["Cabin"], axis = 1)
test = test.drop(["Cabin"], axis = 1)
train.head()

Unnamed: 0,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,Group,Position,Earth,Europa,Mars,Deck,Number,Side
0,0,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,1,1,0,1,0,B,0,P
1,0,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,2,1,1,0,0,F,0,S
2,0,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,3,1,0,1,0,A,0,S
3,0,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,3,2,0,1,0,A,0,S
4,0,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,4,1,1,0,0,F,1,S


### Destination
Destination will be treated the same as HomePlanet (see above)

In [None]:
#One Hot Encoding
train["Cancri"], train["PSO"], train["Trappist"] = pd.get_dummies(train["Destination"])["55 Cancri e"], pd.get_dummies(train["Destination"])["PSO J318.5-22"], pd.get_dummies(train["Destination"])["TRAPPIST-1e"]
test["Cancri"], test["PSO"], test["Trappist"] = pd.get_dummies(test["Destination"])["55 Cancri e"], pd.get_dummies(test["Destination"])["PSO J318.5-22"], pd.get_dummies(test["Destination"])["TRAPPIST-1e"]
#Ensuring it works
train.head()

Unnamed: 0,CryoSleep,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,...,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,TRAPPIST-1e,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,...,1,0,1,0,B,0,P,0,0,1
1,0,TRAPPIST-1e,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,...,1,1,0,0,F,0,S,0,0,1
2,0,TRAPPIST-1e,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,...,1,0,1,0,A,0,S,0,0,1
3,0,TRAPPIST-1e,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,...,2,0,1,0,A,0,S,0,0,1
4,0,TRAPPIST-1e,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,...,1,1,0,0,F,1,S,0,0,1


In [None]:
#Dropping Destination
train = train.drop(["Destination"], axis = 1)
test = test.drop(["Destination"], axis = 1)
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,...,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,False,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,...,1,0,1,0,B,0,P,0,0,1
1,0,24.0,False,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,...,1,1,0,0,F,0,S,0,0,1
2,0,58.0,True,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,...,1,0,1,0,A,0,S,0,0,1
3,0,33.0,False,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,...,2,0,1,0,A,0,S,0,0,1
4,0,16.0,False,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,...,1,1,0,0,F,1,S,0,0,1


### VIP
Same as CryoSleep (see above)

In [None]:
#Convering boolean to integer
train["VIP"] = train["VIP"].apply(convert)
test["VIP"] = test["VIP"].apply(convert)
#ensuring it worked
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported,...,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,Maham Ofracculy,False,...,1,0,1,0,B,0,P,0,0,1
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,Juanna Vines,True,...,1,1,0,0,F,0,S,0,0,1
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,Altark Susent,False,...,1,0,1,0,A,0,S,0,0,1
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,Solam Susent,False,...,2,0,1,0,A,0,S,0,0,1
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,Willy Santantines,True,...,1,1,0,0,F,1,S,0,0,1


### Name
Provides no needed information. Should be removed.

In [None]:
#Dropping Name
train = train.drop(["Name"], axis = 1)
test = test.drop(["Name"], axis = 1)
#Ensuring it worked
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,Group,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,False,1,1,0,1,0,B,0,P,0,0,1
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,True,2,1,1,0,0,F,0,S,0,0,1
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,False,3,1,0,1,0,A,0,S,0,0,1
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,False,3,2,0,1,0,A,0,S,0,0,1
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,True,4,1,1,0,0,F,1,S,0,0,1


### Finalizing the Cleaning of Data

In [None]:
#Group and position and number should be numeric
train["Group"] = pd.to_numeric(train["Group"])
test["Group"] = pd.to_numeric(test["Group"])

train["Position"] = pd.to_numeric(train["Position"])
test["Position"] = pd.to_numeric(test["Position"])

train["Number"] = pd.to_numeric(train["Number"])
test["Number"] = pd.to_numeric(test["Number"])

#Ensuring it worked
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,Group,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,False,1,1,0,1,0,B,0,P,0,0,1
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,True,2,1,1,0,0,F,0,S,0,0,1
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,False,3,1,0,1,0,A,0,S,0,0,1
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,False,3,2,0,1,0,A,0,S,0,0,1
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,True,4,1,1,0,0,F,1,S,0,0,1


In [None]:
#Deck should be Ordinal Encoded (it appears there is a general order)
map = {"A": 0, "B": 1, "C":2, "D": 3, "E": 4, "F": 5, "G": 6, "T": 7}
train["Deck"] = train["Deck"].map(map)
test["Deck"] = test["Deck"].map(map)

In [None]:
#Side has two values, I am going to automatically make them 0 and 1
def convertSide(s):
  if s == "P":
    return 0
  return 1

train["Side"] = train["Side"].apply(convertSide)
test["Side"] = test["Side"].apply(convertSide)
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,Group,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,False,1,1,0,1,0,1,0,0,0,0,1
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,True,2,1,1,0,0,5,0,1,0,0,1
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,False,3,1,0,1,0,0,0,1,0,0,1
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,False,3,2,0,1,0,0,0,1,0,0,1
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,True,4,1,1,0,0,5,1,1,0,0,1


## Feature Engineering

In [None]:
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,Group,Position,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,False,1,1,0,1,0,1,0,0,0,0,1
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,True,2,1,1,0,0,5,0,1,0,0,1
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,False,3,1,0,1,0,0,0,1,0,0,1
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,False,3,2,0,1,0,0,0,1,0,0,1
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,True,4,1,1,0,0,5,1,1,0,0,1


### Total
I made a variable called total which represents how much they spent on the spaceship.

In [None]:
train["Total"] = train["RoomService"] + train["FoodCourt"] + train["ShoppingMall"] + train["Spa"] + train["VRDeck"]
test["Total"] = test["RoomService"] + test["FoodCourt"] + test["ShoppingMall"] + test["Spa"] + train["VRDeck"]
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported,Group,...,Earth,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist,Total
0,0,39.0,0,0.0,0.0,0.0,0.0,0.0,False,1,...,0,1,0,1,0,0,0,0,1,0.0
1,0,24.0,0,109.0,9.0,25.0,549.0,44.0,True,2,...,1,0,0,5,0,1,0,0,1,736.0
2,0,58.0,1,43.0,3576.0,0.0,6715.0,49.0,False,3,...,0,1,0,0,0,1,0,0,1,10383.0
3,0,33.0,0,0.0,1283.0,371.0,3329.0,193.0,False,3,...,0,1,0,0,0,1,0,0,1,5176.0
4,0,16.0,0,303.0,70.0,151.0,565.0,2.0,True,4,...,1,0,0,5,1,1,0,0,1,1091.0


## Normalizing Data

In [None]:
transported = train["Transported"]
train = train.drop(["Transported"], axis = 1)

In [None]:
#I am goin to Z-Score normalize everything
train = (train - train.mean()) / train.std()
test = (test - test.mean()) / test.std()

In [None]:
train["Transported"] = transported
train.head()

Unnamed: 0,CryoSleep,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Group,Position,...,Europa,Mars,Deck,Number,Side,Cancri,PSO,Trappist,Total,Transported
0,-0.773436,0.702054,-0.220183,-0.337006,-0.284257,-0.2873,-0.27372,-0.266082,-1.734309,-0.491133,...,1.754694,-0.503635,-1.886213,-1.141661,-1.007447,-0.510984,-0.317468,0.685273,-0.521775,False
1,-0.773436,-0.333213,-0.220183,-0.173518,-0.278672,-0.245957,0.209255,-0.227678,-1.733935,-0.491133,...,-0.569834,-0.503635,0.385448,-1.141661,0.992493,-0.510984,-0.317468,0.685273,-0.263102,True
2,-0.773436,2.013391,4.541146,-0.272511,1.934808,-0.2873,5.633703,-0.223314,-1.73356,-0.491133,...,1.754694,-0.503635,-2.454128,-1.141661,0.992493,-0.510984,-0.317468,0.685273,3.127415,False
3,-0.773436,0.287947,-0.220183,-0.337006,0.511901,0.326231,2.654919,-0.097629,-1.73356,0.457416,...,1.754694,-0.503635,-2.454128,-1.141661,0.992493,-0.510984,-0.317468,0.685273,1.297372,False
4,-0.773436,-0.885355,-0.220183,0.11746,-0.240819,-0.037588,0.223331,-0.264336,-1.733186,-0.491133,...,-0.569834,-0.503635,0.385448,-1.139715,0.992493,-0.510984,-0.317468,0.685273,-0.138335,True


## Saving Data

In [None]:
train.to_csv("CleanedTrainingData.csv")
test.to_csv("CleanedTestingData.csv")