In [1]:
import pandas

## There are two main data types in pandas (series and data frames)

In [12]:
# a series is a single column, it accepts a list argument
series = pandas.Series(['BMW', 'Toyota', 'Honda'])

In [3]:
series

0       BMW
1    Toyota
2     Honda
dtype: object

In [4]:
colors = pandas.Series(['Red', 'Blue', 'White'])

In [5]:
colors

0      Red
1     Blue
2    White
dtype: object

In [11]:
# a data frame is far more common than a series, it's multi-column
# data frames accept an dictionary argument
# data frames can use series and thus the 2 can be combined

car_data = pandas.DataFrame({
    'Car make': series,
    'Colors': colors
})

In [7]:
car_data

Unnamed: 0,Car make,Colors
0,BMW,Red
1,Toyota,Blue
2,Honda,White


<img src="pandas-anatomy-of-a-dataframe.png" width="700" />

## Importing and exporting data

In [31]:
# import data
car_sales = pandas.read_csv('car-sales.csv')

# importing from external URLs is also supported
# pandas.read_csv("https://raw.githubusercontent.com/mrdbourke/zero-to-mastery-ml/master/data/heart-disease.csv")

In [10]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [24]:
# export data
car_sales.to_csv('exported-car-sales.csv', index=False)

In [25]:
exported_car_sales = pandas.read_csv('exported-car-sales.csv')

In [26]:
exported_car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


## Analyzing the data

In [40]:
# see the table structure (similar to `DESCRIBE <TABLE_NAME>` in sql)
car_sales.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [36]:
car_sales.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [39]:
car_sales.index

RangeIndex(start=0, stop=10, step=1)

In [43]:
# show all numeric columns (note price isn't appearing here because the csv saved it as a string)
car_sales.describe()

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [45]:
# info is like dtypes + index combined
car_sales.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 528.0+ bytes


In [48]:
# get the average of numerical columns
car_sales.mean(numeric_only=True)

Odometer (KM)    78601.4
Doors                4.0
dtype: float64

In [52]:
# get the mean of a particular series
car_prices = pandas.Series([10, 40, 30])
car_prices.mean()

26.666666666666668

In [57]:
# get info about a particular column
car_sales['Doors'].sum()


40

In [60]:
car_sales['Doors'].mean()

4.0

In [61]:
car_sales['Doors'].median()

4.0

In [62]:
car_sales['Doors'].mode()

0    4
Name: Doors, dtype: int64

In [64]:
# get the number of rows
len(car_sales)

10

## Viewing and selecting data

In [72]:
# returns the first x rows of a data frame (5 by default if no arg is specified)
# common practise is to make some changes to the data frame and calling head repeatedly to check the results
car_sales.head(3)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"


In [77]:
# returns the last x rows
car_sales.tail(1)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
9,Nissan,White,31600,4,"$9,700.00"
