# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [2]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [4]:
cars1=cars1.loc[:,'mpg':'car']
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [5]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [8]:
cars = pd.concat([cars1, cars2], ignore_index=True)  # ignore_index=True 会重置索引（可选）
## .append() 已经废弃
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97,52,2130,24.6,82,2,vw pickup
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [11]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype=np.int64)
nr_owners

array([71865, 41030, 18825, 57987, 50251, 32903, 60802, 47101, 29106,
       53332, 57497, 42399, 44685, 18622, 41654, 22273, 55930, 32992,
       60792, 61132, 27194, 16595, 47582, 40498, 39911, 43534, 30684,
       25562, 60339, 34653, 32672, 63810, 63473, 37219, 24967, 37489,
       70753, 64620, 45527, 20944, 47001, 20176, 70554, 55768, 60261,
       72980, 28017, 40709, 15014, 55342, 23694, 71811, 65119, 35994,
       30991, 61800, 45240, 22919, 31531, 19192, 29095, 35151, 35315,
       68494, 50595, 39901, 43119, 47618, 53267, 21459, 65853, 72457,
       50960, 45758, 65103, 23147, 32203, 61200, 41406, 38305, 67968,
       37388, 38380, 70424, 31178, 65254, 41591, 32624, 41547, 29847,
       55155, 56183, 26153, 27896, 33412, 62877, 25740, 30481, 46018,
       59657, 19881, 32804, 29937, 38089, 71303, 33619, 45164, 51666,
       67242, 63819, 27463, 55049, 53105, 20077, 69287, 27606, 16110,
       15705, 40789, 38338, 27643, 55378, 39325, 52072, 24929, 60244,
       50056, 57957,

### Step 8. Add the column owners to cars

In [10]:
cars['owners'] = nr_owners
cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,56795
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,65726
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,56416
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,15504
397,31.0,4,119,82,2720,19.4,82,1,chevy s-10,32190
