In [1]:
import pandas as pd
import numpy as np 

import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'

In [2]:
raw = pd.read_table('./auto-mpg.data', sep = '\t', header = None)

  """Entry point for launching an IPython kernel.


In [3]:
raw.shape

(398, 2)

In [4]:
raw.sample(3)

Unnamed: 0,0,1
260,18.6 6 225.0 110.0 3620. 18...,dodge aspen
53,31.0 4 71.00 65.00 1773. 19...,toyota corolla 1200
350,34.7 4 105.0 63.00 2215. 14...,plymouth horizon 4


<span style='color:purple'> Add the columns I want to populate the values of temp[0] to </span>

In [5]:
variables = pd.DataFrame(np.zeros((raw.shape[0], 8)), columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 
                                                    'weight', 'acceleration', 'model_year', 'origin'])
variables.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<span style='color:purple'> Create a list of 8 elements from each row </span>

In [6]:
def make_columns(x):
    """
    x is one entry of raw[0]
    
    """
    lst = []
    for n in x.split(' '):
        if n != '':
            lst.append(n)
            
    return lst

In [7]:
import warnings
warnings.filterwarnings('ignore')

In [8]:
raw['temp'] = raw[0].map(lambda x: make_columns(x))
raw.head(2)

Unnamed: 0,0,1,temp
0,18.0 8 307.0 130.0 3504. 12...,chevrolet chevelle malibu,"[18.0, 8, 307.0, 130.0, 3504., 12.0, 70, 1]"
1,15.0 8 350.0 165.0 3693. 11...,buick skylark 320,"[15.0, 8, 350.0, 165.0, 3693., 11.5, 70, 1]"


In [9]:
# make sure we actually have 8 entries in each list to avoid errors when populating values
for k in raw['temp']:
    if len(k) != 8:
        print(k.index)

<span style='color:purple'> We're good to go. This is where I populate the values to the corresponding columns </span>

In [10]:
for r in range(raw.shape[0]):
    variables.iloc[r, :] = raw['temp'][r]

In [11]:
# to confirm all match
print(raw['temp'].tail())
variables.tail()

393    [27.0, 4, 140.0, 86.00, 2790., 15.6, 82, 1]
394    [44.0, 4, 97.00, 52.00, 2130., 24.6, 82, 2]
395    [32.0, 4, 135.0, 84.00, 2295., 11.6, 82, 1]
396    [28.0, 4, 120.0, 79.00, 2625., 18.6, 82, 1]
397    [31.0, 4, 119.0, 82.00, 2720., 19.4, 82, 1]
Name: temp, dtype: object


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin
393,27.0,4,140.0,86.0,2790.0,15.6,82,1
394,44.0,4,97.0,52.0,2130.0,24.6,82,2
395,32.0,4,135.0,84.0,2295.0,11.6,82,1
396,28.0,4,120.0,79.0,2625.0,18.6,82,1
397,31.0,4,119.0,82.0,2720.0,19.4,82,1


<span style='color:purple'> Now combine what we have, to make a ready data frame </span>

In [18]:
df = pd.concat([variables, raw], axis = 1, sort = False).drop(['temp', 0], axis = 1).rename(columns = {1: 'car_name'})
df.sample(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,car_name
77,22.0,4,121.0,76.0,2511.0,18.0,72,2,volkswagen 411 (sw)
334,23.7,3,70.0,100.0,2420.0,12.5,80,3,mazda rx-7 gs
85,13.0,8,350.0,175.0,4100.0,13.0,73,1,buick century 350


In [20]:
# saving the ready to use data frame
df.to_csv('./ready_dataframe.csv', index = False)