## Data Conversion Notebook
This module can be useful to store methods of converting, transforming, and imputing values for datasets.

In [1]:
import pandas as pd

In [2]:
train = pd.read_csv("train.csv")

In [3]:
class NumericDataFrame():
    """
    This class converts categorical types of a dataframe into numeric types.
    It also automatically imputes missing values as the row mean.
    """
    def __init__(self, df):
        self.type_objs = df.select_dtypes(exclude=['float64',"int64"])
        self.type_numeric = df.select_dtypes(include=['float64',"int64"])
        self.transform_dict = self.get_transform_dict()
        
    def impute(self, df):
        df = df.apply(lambda x: x.fillna(x.mean()),axis=0)
        return df

    def get_transform_dict(self):
        transform_dict = {}
        for col in self.type_objs.columns:
            cats = pd.Categorical(self.type_objs[col]).categories
            d = {}
            for i, cat in enumerate(cats):
                d[cat] = i + 2
            transform_dict[col] = d
        return transform_dict

    def __call__(self):
        df = self.type_objs
        df.replace(self.transform_dict, inplace=True)
        df = pd.merge(self.type_numeric, df, left_index=True, right_index=True)
        df = self.impute(df)
        return df

In [4]:
# Example of calling instance of this class
train_numeric = NumericDataFrame(train)()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  regex=regex)
