In [1]:
import pandas as pd
import numpy as np

## Read the dataframe

In [2]:
df = pd.read_csv('my_df.csv', index_col=0)

In [3]:
df.head(2) # first 2 rows

Unnamed: 0,Quantity,Color,Price_per_kg
Apples,20,Red,3
Bananas,30,Yellow,2


In [4]:
df.tail(2) # the last 2 rows

Unnamed: 0,Quantity,Color,Price_per_kg
Cherries,15,Red,4
Dates,10,Brown,5


In [5]:
df.info() # basic info about the dataframe

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, Apples to Dates
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Quantity      4 non-null      int64 
 1   Color         4 non-null      object
 2   Price_per_kg  4 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 128.0+ bytes


In [6]:
df.describe()

Unnamed: 0,Quantity,Price_per_kg
count,4.0,4.0
mean,18.75,3.5
std,8.539126,1.290994
min,10.0,2.0
25%,13.75,2.75
50%,17.5,3.5
75%,22.5,4.25
max,30.0,5.0


## Indexing the dataframe

In [7]:
# LOC - selecting row by the index value
df.loc['Dates']

Quantity           10
Color           Brown
Price_per_kg        5
Name: Dates, dtype: object

In [8]:
# ILOC - selecting row by the numerical index
df.iloc[0]

Quantity         20
Color           Red
Price_per_kg      3
Name: Apples, dtype: object

In [9]:
# Slicing
df[0:3]

Unnamed: 0,Quantity,Color,Price_per_kg
Apples,20,Red,3
Bananas,30,Yellow,2
Cherries,15,Red,4


In [10]:
# Slicing
df[0:3]['Price_per_kg']

Apples      3
Bananas     2
Cherries    4
Name: Price_per_kg, dtype: int64

In [11]:
df[0:3][['Price_per_kg', 'Color']]

Unnamed: 0,Price_per_kg,Color
Apples,3,Red
Bananas,2,Yellow
Cherries,4,Red


## Adding, Renaming, Removing

In [12]:
df['Country'] = ['USA', 'USA', 'USA', 'Canada']

In [13]:
df

Unnamed: 0,Quantity,Color,Price_per_kg,Country
Apples,20,Red,3,USA
Bananas,30,Yellow,2,USA
Cherries,15,Red,4,USA
Dates,10,Brown,5,Canada


In [14]:
df.rename(columns={'Price_per_kg': 'Price'}, inplace=True)

In [15]:
df

Unnamed: 0,Quantity,Color,Price,Country
Apples,20,Red,3,USA
Bananas,30,Yellow,2,USA
Cherries,15,Red,4,USA
Dates,10,Brown,5,Canada


In [16]:
# axis 1 - columns
df.drop('Country', axis=1, inplace=True)

In [17]:
df

Unnamed: 0,Quantity,Color,Price
Apples,20,Red,3
Bananas,30,Yellow,2
Cherries,15,Red,4
Dates,10,Brown,5


## Changing the data inside columns

In [18]:
df['Quantity'] * 2

Apples      40
Bananas     60
Cherries    30
Dates       20
Name: Quantity, dtype: int64

In [19]:
df['Quantity'] = df['Quantity'] * 2

In [20]:
df

Unnamed: 0,Quantity,Color,Price
Apples,40,Red,3
Bananas,60,Yellow,2
Cherries,30,Red,4
Dates,20,Brown,5


## Nan values

In [21]:
df.isna()

Unnamed: 0,Quantity,Color,Price
Apples,False,False,False
Bananas,False,False,False
Cherries,False,False,False
Dates,False,False,False


## Changing the data type in a column

In [24]:
df['Quantity'] = df['Quantity'].astype('float16')

In [25]:
df['Quantity']

Apples      40.0
Bananas     60.0
Cherries    30.0
Dates       20.0
Name: Quantity, dtype: float16

## Simple statistics

In [26]:
df.mean()

  df.mean()


Quantity    37.5
Price        3.5
dtype: float64

In [27]:
df.min()

Quantity     20.0
Color       Brown
Price           2
dtype: object

In [28]:
df.max()

Quantity      60.0
Color       Yellow
Price            5
dtype: object

In [33]:
df.drop('Score', axis=1,inplace=True)

In [34]:
df

Unnamed: 0,Quantity,Color,Price
Apples,40.0,Red,3
Bananas,60.0,Yellow,2
Cherries,30.0,Red,4
Dates,20.0,Brown,5
