In [2]:
import numpy as np
import pandas as pd

#### Creating DataFrame

In [3]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 850001]
    }

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,850001


In [4]:
data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000],
    ]

df2 = pd.DataFrame(data_list)
df2

Unnamed: 0,0,1,2,3
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Adding Columns to data_list

In [5]:
columns = ['Name', 'Age', 'City', 'Salary']
df2 = pd.DataFrame(data_list, columns=columns)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Selection and Indexing of Columns.

In [6]:
df2['Salary']  # Access a column
df2[['Name', 'Age']]  # Access multiple columns

Unnamed: 0,Name,Age
0,John,28
1,Anna,34
2,Peter,29
3,Linda,42


#### Creating a new column

In [7]:
df2['Role'] = ['IT Engineer', 'Soft. Engineer', 'Doctor', 'Soft. Engineer']
df2

Unnamed: 0,Name,Age,City,Salary,Role
0,John,28,New York,65000,IT Engineer
1,Anna,34,Paris,70000,Soft. Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Soft. Engineer


#### Delete a column

In [8]:
df2.drop('Role', axis=1)

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


It just give the copy of deleted dataFrame not actually delete it.

In [9]:
df2

Unnamed: 0,Name,Age,City,Salary,Role
0,John,28,New York,65000,IT Engineer
1,Anna,34,Paris,70000,Soft. Engineer
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Soft. Engineer


To permanently delete use inplace=True

In [10]:
df2.drop('Role', axis=1, inplace=True)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Selecting a Row

In [11]:
df2.loc[0]  # Access a row
df2.loc[[0,2]]  # Access multiple rows

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
2,Peter,29,Berlin,62000


#### Selecting rows by position

In [12]:
df2.iloc[3]  # Access a row
df2.iloc[[2,3]]  # Access multiple rows

Unnamed: 0,Name,Age,City,Salary
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [13]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Creating a new row

In [14]:
df2.loc[4] = ['Dipesh', 22, 'Pune', 40000] 
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000
4,Dipesh,22,Pune,40000


#### Delete a row

In [15]:
df2.drop(4, axis=0, inplace=True)
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Selecting Subsets of Rows and Columns

In [16]:
df2.loc[[1,3]][['Name', 'Salary']]

Unnamed: 0,Name,Salary
1,Anna,70000
3,Linda,85000


In [17]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Conditional Selection

I only want to see those people whose age is above 30

In [18]:
df2[df2['Age'] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


I only want poeple whose age is above 30 and their city must be paris

In [19]:
df2[(df2['Age'] > 30) & (df2['City'] == 'Paris')]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000


#### First 5 rows

In [20]:
df2.head()

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Last 2 rows

In [21]:
df2.tail(2)

Unnamed: 0,Name,Age,City,Salary
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


#### Summary statistics

In [22]:
df2.describe()

Unnamed: 0,Age,Salary
count,4.0,4.0
mean,33.25,70500.0
std,6.396614,10214.368964
min,28.0,62000.0
25%,28.75,64250.0
50%,31.5,67500.0
75%,36.0,73750.0
max,42.0,85000.0


#### Structure info

In [23]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
 3   Salary  4 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 332.0+ bytes
