# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [6]:
cars1 = pd.read_csv('cars1.csv')
cars2 = pd.read_csv('cars2.csv')
print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [7]:
#slice to remove unwanted columns
cars1 = cars1.loc[:,'mpg':'car']
cars1

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


### Step 5. What is the number of observations in each dataset?

In [10]:
#len to see how many rows of data 
print(len(cars1))
print(len(cars2))

198
200


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [13]:
#appending cars1 to cars2
cars = cars1.append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [17]:
#creating random data 
owners = np.random.randint(low=15000, high=73001, size=398, dtype='l')
owners

array([45825, 26059, 67809, 67727, 33351, 68935, 26660, 16574, 16201,
       18821, 67946, 16037, 36822, 66757, 70987, 54735, 30344, 16408,
       58727, 65630, 33869, 39920, 46361, 61572, 19554, 49236, 24404,
       44538, 58396, 29773, 27354, 35082, 61380, 26838, 23363, 69754,
       30343, 65556, 43339, 45752, 42983, 67343, 49953, 61981, 71950,
       72704, 17816, 61972, 59334, 26375, 56886, 39600, 36874, 41912,
       16747, 34665, 64674, 46205, 38818, 32081, 36135, 62765, 66249,
       46395, 57000, 37470, 65499, 31127, 46753, 44202, 39855, 45504,
       53979, 35744, 57101, 36907, 21289, 60747, 38576, 70636, 33594,
       61472, 61096, 48047, 35352, 15935, 68890, 48446, 53826, 38186,
       65578, 40052, 56218, 40724, 56258, 68696, 43631, 55988, 38970,
       38420, 42069, 66190, 16610, 58504, 54200, 63640, 39840, 57145,
       16620, 31190, 61074, 23926, 58464, 64206, 27041, 44119, 57783,
       64424, 40925, 71420, 47026, 38073, 34175, 55305, 63441, 42354,
       38871, 42356,

### Step 8. Add the column owners to cars

In [18]:
#creating owners column with owners random data
cars['owners'] = owners
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,45825
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,26059
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,67809
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,67727
4,17.0,8,302,140,3449,10.5,70,1,ford torino,33351
...,...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,69381
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,19806
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,57152
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,17987
