# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [3]:
url1 = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv'
url2 = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv'
cars1 = pd.read_csv(url1)
cars2 = pd.read_csv(url2)

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [7]:
cars1 = cars1.loc[:, 'mpg':'car']
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [9]:
print(cars1.shape[0])
print(cars2.shape[0])

198
200


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [10]:
cars = cars1.append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [11]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([50877, 30093, 39155, 28124, 17307, 50948, 24496, 30355, 46199,
       43085, 15524, 51374, 29217, 49369, 35094, 63003, 47037, 19564,
       25020, 15724, 21680, 21185, 54666, 71166, 33017, 61887, 49362,
       54699, 26181, 37598, 47075, 55255, 64706, 52196, 72472, 46090,
       54241, 34823, 31801, 19891, 69968, 22887, 62502, 30452, 62088,
       37135, 38515, 27170, 65794, 67727, 37963, 48336, 57812, 34003,
       23033, 43652, 16024, 66220, 54980, 58305, 59828, 32658, 57883,
       32354, 56024, 33389, 56823, 67773, 43908, 22059, 28337, 67808,
       18168, 41568, 45821, 52153, 26494, 38473, 19107, 70910, 66366,
       18210, 21544, 67441, 41255, 49996, 36601, 69116, 28596, 45729,
       21724, 23325, 40636, 53332, 64167, 16600, 20041, 23839, 61349,
       31216, 18233, 28258, 37287, 62696, 63283, 62483, 28951, 17304,
       56246, 42973, 57653, 48119, 42624, 57191, 25744, 47147, 36083,
       20500, 37068, 62941, 17434, 21083, 20340, 35928, 59528, 19081,
       15247, 49672,

### Step 8. Add the column owners to cars

In [12]:
cars['owners'] = nr_owners
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,50877
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,30093
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,39155
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,28124
4,17.0,8,302,140,3449,10.5,70,1,ford torino,17307
...,...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,64487
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,15589
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,33822
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,48342
