# Pandas DataFrame and Series
-------

Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A Series is a one dimensional (1D) array-like object, while a DataFrame is a two dimensional (2D), size-mutable, and potentially heterogeneous tabular data structure with labeled axes (rows and columns).

In [2]:
# importing pandas
import pandas as pd

## Series
-----
A pandas series is like an 1D array-like object that can hold any data type. It is similar to a column in a table.

In [6]:
## Series
data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print("Series:\n", series)
print(type(series))

Series:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [7]:
## Series from dictionary
data = {'a': 1, 'b': 2, 'c': 3}
pd.Series(data)

a    1
b    2
c    3
dtype: int64

In [8]:
data = [10, 20, 30]
i = ['a', 'b', 'c']

pd.Series(data, index=i)

a    10
b    20
c    30
dtype: int64

## DataFrame
-----

In [9]:
## Create a dataframe from a dictionary of list

data = {
    "Name": ['Talha', 'Tahmid', 'Raccoon'],
    'Age': [23, 20, 3],
    'Address': ['Dhaka', 'Khulna', 'Amazon']
}

df = pd.DataFrame(data)
print(df)
print(type(df))

      Name  Age Address
0    Talha   23   Dhaka
1   Tahmid   20  Khulna
2  Raccoon    3  Amazon
<class 'pandas.core.frame.DataFrame'>


In [10]:
import numpy as np
np.array(df)

array([['Talha', 23, 'Dhaka'],
       ['Tahmid', 20, 'Khulna'],
       ['Raccoon', 3, 'Amazon']], dtype=object)

In [11]:
## Create a Dataframe from a List of Dictionaries

data = [
    {'Name': 'Talha', 'Age': 23, 'Address': 'Kishoreganj'},
    {'Name': 'Tahmid', 'Age': 19, 'Address': 'Khulna'},
    {'Name': 'Cheetah', 'Age': 2, 'Address': 'Sahara'},
    {'Name': 'Milan', 'Age': 32, 'Address': 'France'}
]

df = pd.DataFrame(data)
print(df)
print(type(df))

      Name  Age      Address
0    Talha   23  Kishoreganj
1   Tahmid   19       Khulna
2  Cheetah    2       Sahara
3    Milan   32       France
<class 'pandas.core.frame.DataFrame'>


In [12]:
df

Unnamed: 0,Name,Age,Address
0,Talha,23,Kishoreganj
1,Tahmid,19,Khulna
2,Cheetah,2,Sahara
3,Milan,32,France


In [15]:
### Accessing data elements
print(df['Name'])
type(df['Name'])

0      Talha
1     Tahmid
2    Cheetah
3      Milan
Name: Name, dtype: object


pandas.core.series.Series

In [19]:
### Row index
df.loc[0]

Name             Talha
Age                 23
Address    Kishoreganj
Name: 0, dtype: object

In [None]:
### Column index
df.iloc[0]

Name             Talha
Age                 23
Address    Kishoreganj
Name: 0, dtype: object

In [24]:
## Accessing a Specified Element
print(df.at[1, 'Age'])      # at[index, column_name]
print(df.at[2, 'Name'])

19
Cheetah


In [25]:
## Accessing a specified element using iat
df.iat[2, 2]

'Sahara'

In [26]:
df

Unnamed: 0,Name,Age,Address
0,Talha,23,Kishoreganj
1,Tahmid,19,Khulna
2,Cheetah,2,Sahara
3,Milan,32,France


In [None]:
### Data Manipulation with Dataframe
## Adding a Column
df['Salary'] = [5000, 10000, 0, 50000]

df

Unnamed: 0,Name,Age,Address,Salary
0,Talha,23,Kishoreganj,5000
1,Tahmid,19,Khulna,10000
2,Cheetah,2,Sahara,0
3,Milan,32,France,50000


In [31]:
## Remove a column

## It is not parmanent
df.drop('Salary', axis=1)       # axis = 0 > index
                                # axis = 1 > columns

# to make it parmanent we add another parameter called `inplace=True`
df.drop('Salary', axis=1, inplace=True)

In [32]:
df

Unnamed: 0,Name,Age,Address
0,Talha,23,Kishoreganj
1,Tahmid,19,Khulna
2,Cheetah,2,Sahara
3,Milan,32,France


In [None]:
## Increment column value
df['Age'] = df['Age'] + 1
df

Unnamed: 0,Name,Age,Address
0,Talha,24,Kishoreganj
1,Tahmid,20,Khulna
2,Cheetah,3,Sahara
3,Milan,33,France


In [None]:
## Remove a row
df.drop(0, inplace=True)