In [1]:
import pandas as pd


In [2]:
series = pd.Series(["BMW", "AUDI", "Honda"])
colors = pd.Series(["Red", "Yellow", "Orange"])

In [3]:
series

0      BMW
1     AUDI
2    Honda
dtype: object

In [4]:
# Data frame is 2-d, and it far more common than series data
car_data = pd.DataFrame({"Car make": series, "Color": colors})

In [5]:
car_data

Unnamed: 0,Car make,Color
0,BMW,Red
1,AUDI,Yellow
2,Honda,Orange


In [6]:
# import data from a csv file
car_sales = pd.read_csv("car-sales.csv")


In [7]:
car_sales

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [12]:
# exporting, important to put index equals false
car_sales.to_csv("exported-car-sales.csv", index = False) 


In [13]:
car_sales.dtypes

Make             object
Colour           object
Odometer (KM)     int64
Doors             int64
Price            object
dtype: object

In [14]:
car_sales.columns

Index(['Make', 'Colour', 'Odometer (KM)', 'Doors', 'Price'], dtype='object')

In [15]:
car_sales.describe() # only works on numerical values

Unnamed: 0,Odometer (KM),Doors
count,10.0,10.0
mean,78601.4,4.0
std,61983.471735,0.471405
min,11179.0,3.0
25%,35836.25,4.0
50%,57369.0,4.0
75%,96384.5,4.0
max,213095.0,5.0


In [16]:
car_sales.info() # index combined with dtypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Make           10 non-null     object
 1   Colour         10 non-null     object
 2   Odometer (KM)  10 non-null     int64 
 3   Doors          10 non-null     int64 
 4   Price          10 non-null     object
dtypes: int64(2), object(3)
memory usage: 528.0+ bytes


In [17]:
car_sales.mean() # only for numerical values

Odometer (KM)    78601.4
Doors                4.0
dtype: float64

In [18]:
car_sales.sum()

Make             ToyotaHondaToyotaBMWNissanToyotaHondaHondaToyo...
Colour               WhiteRedBlueBlackWhiteGreenBlueBlueWhiteWhite
Odometer (KM)                                               786014
Doors                                                           40
Price            $4,000.00$5,000.00$7,000.00$22,000.00$3,500.00...
dtype: object

In [19]:
car_sales["Doors"].sum() # for a specific collumn

40

In [20]:
len(car_sales) # length of the data frame

10

# Viewing and selecting data

In [21]:
car_sales.head() # gives us a quick snapshot by defualt top 5

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"


In [22]:
car_sales.head(7) # we can update

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"


In [23]:
car_sales.tail(3)

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"


In [26]:
animals = pd.Series(["cat","dog","lion","elephant"], index=[1,2,3,3])

In [27]:
animals

1         cat
2         dog
3        lion
3    elephant
dtype: object

In [28]:
animals.loc[3] # all index with 3

3        lion
3    elephant
dtype: object

In [29]:
# iloc refers to position
animals.iloc[3]

'elephant'

In [30]:
animals.iloc[:3] # as it starts from zero so 0,1,2 is there 3 is not included

1     cat
2     dog
3    lion
dtype: object

In [32]:
car_sales[car_sales["Make"] == "Toyota"]

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
5,Toyota,Green,99213,4,"$4,500.00"
8,Toyota,White,60000,4,"$6,250.00"


In [33]:
car_sales[car_sales["Odometer (KM)"] > 10000] # adding conditions in the pandas

Unnamed: 0,Make,Colour,Odometer (KM),Doors,Price
0,Toyota,White,150043,4,"$4,000.00"
1,Honda,Red,87899,4,"$5,000.00"
2,Toyota,Blue,32549,3,"$7,000.00"
3,BMW,Black,11179,5,"$22,000.00"
4,Nissan,White,213095,4,"$3,500.00"
5,Toyota,Green,99213,4,"$4,500.00"
6,Honda,Blue,45698,4,"$7,500.00"
7,Honda,Blue,54738,4,"$7,000.00"
8,Toyota,White,60000,4,"$6,250.00"
9,Nissan,White,31600,4,"$9,700.00"
