# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [11]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [12]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")
print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [13]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


### Step 5. What is the number of observations in each dataset?

In [14]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [15]:
cars = pd.concat([cars1,cars2], ignore_index=True)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97,52,2130,24.6,82,2,vw pickup
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [17]:
random_owner = np.random.randint(15000, high=73001, size= 398, dtype='l')
random_owner

array([30387, 56687, 54462, 19344, 59542, 24599, 66175, 20983, 22666,
       27861, 52170, 43284, 22690, 51517, 66880, 29861, 53894, 71001,
       29659, 28475, 26868, 19679, 19072, 54105, 53402, 59639, 48999,
       29735, 25225, 32249, 61466, 22643, 23808, 65592, 69983, 57778,
       47548, 52957, 47939, 52098, 42952, 62063, 25095, 41750, 51407,
       67827, 21785, 34231, 21449, 68029, 57192, 32843, 19399, 41658,
       19225, 53435, 27869, 65409, 46406, 65808, 17510, 16469, 17683,
       25291, 45315, 42414, 16619, 58847, 31087, 57179, 40631, 50741,
       20279, 26984, 22210, 18392, 35484, 43701, 51847, 71020, 68358,
       17122, 49556, 55108, 20503, 61951, 23287, 20969, 35310, 70911,
       24545, 59719, 42353, 62787, 71301, 26678, 69540, 61312, 55913,
       70358, 58251, 46329, 66471, 16995, 56816, 60212, 30737, 37585,
       22192, 54902, 64569, 17712, 38102, 63173, 60499, 68634, 37551,
       71105, 63862, 45573, 45146, 18754, 41452, 66780, 37070, 61071,
       55627, 69169,

### Step 8. Add the column owners to cars

In [19]:
cars["owners"] = random_owner
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,30387
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,56687
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,54462
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,19344
4,17.0,8,302,140,3449,10.5,70,1,ford torino,59542
...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,40611
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,64014
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,38887
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,23585
