Pandas is a powerful data manipulation library in Python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. Series is one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable an potentially heterogeneous tabular data structure with labled axes(rows and columns).

In [2]:
!pip install pandas



In [9]:
## Series
# A pandas Series is a one-dimensional array of indexed data. It is similar to a column in a table.

import pandas as pd
data = [1, 2, 3, 4, 5]
series = pd.Series(data)
print("Series :\n", series)
print(type(series))

Series :
 0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [10]:
## Create a Series from dictinary

data = {'a': 1, 'b': 2, 'c': 3}
series_dict = pd.Series(data)
print("Series from dictionary :\n", series_dict)

Series from dictionary :
 a    1
b    2
c    3
dtype: int64


In [12]:
data = [10, 20, 30]
indexs = ['a', 'b', 'c']
pd.Series(data, index=indexs)

a    10
b    20
c    30
dtype: int64

In [14]:
## DataFrame
# Create a DataFrame from a dictionary of list

data = {
    'Name' : ['Nabin', 'Rabin', 'Sabin'],
    'Age' : [23, 35, 40],
    'City' : ['Kathmandu', 'Lalitpur', 'Bhaktapur']
}

df = pd.DataFrame(data)
print(df)

    Name  Age       City
0  Nabin   23  Kathmandu
1  Rabin   35   Lalitpur
2  Sabin   40  Bhaktapur


In [15]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [16]:
import numpy as np
np.array(df)

array([['Nabin', 23, 'Kathmandu'],
       ['Rabin', 35, 'Lalitpur'],
       ['Sabin', 40, 'Bhaktapur']], dtype=object)

In [23]:
## Create a DataFrame from a list of dictionaries

data = [
    {'Name': 'Nabin', 'Age': 23, 'City': 'Kathmandu'},
    {'Name': 'Rabin', 'Age': 35, 'City': 'Lalitpur'},
    {'Name': 'Sabin', 'Age': 40, 'City': 'Bhaktapur'}
]

df = pd.DataFrame(data)
print(df)

    Name  Age       City
0  Nabin   23  Kathmandu
1  Rabin   35   Lalitpur
2  Sabin   40  Bhaktapur


In [24]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [25]:
df

Unnamed: 0,Name,Age,City
0,Nabin,23,Kathmandu
1,Rabin,35,Lalitpur
2,Sabin,40,Bhaktapur


In [26]:
df['Name']

0    Nabin
1    Rabin
2    Sabin
Name: Name, dtype: object

In [27]:
type(df['Name'])

pandas.core.series.Series

In [28]:
df.loc[0]

Name        Nabin
Age            23
City    Kathmandu
Name: 0, dtype: object

In [30]:
df.loc[0][0]

  df.loc[0][0]


'Nabin'

In [29]:
df.iloc[0]

Name        Nabin
Age            23
City    Kathmandu
Name: 0, dtype: object

In [31]:
df.iloc[0][1]

  df.iloc[0][1]


np.int64(23)

In [32]:
## Accessing a specified element
df['Name']

0    Nabin
1    Rabin
2    Sabin
Name: Name, dtype: object

In [33]:
df

Unnamed: 0,Name,Age,City
0,Nabin,23,Kathmandu
1,Rabin,35,Lalitpur
2,Sabin,40,Bhaktapur


In [34]:
df.at[1,'Age']

np.int64(35)

In [35]:
df.at[2, 'City']

'Bhaktapur'

In [36]:
## Acessing a specified element using iat

df.iat[1, 1]

np.int64(35)

In [37]:
df.iat[2, 2]

'Bhaktapur'

In [38]:
## Data Manipulation with DataFrame
df

Unnamed: 0,Name,Age,City
0,Nabin,23,Kathmandu
1,Rabin,35,Lalitpur
2,Sabin,40,Bhaktapur


In [40]:
df['Salary'] = [300000, 200000, 100000]
df

Unnamed: 0,Name,Age,City,Salary
0,Nabin,23,Kathmandu,300000
1,Rabin,35,Lalitpur,200000
2,Sabin,40,Bhaktapur,100000


In [41]:
# Remove a column
df.drop('Salary', axis=1)

Unnamed: 0,Name,Age,City
0,Nabin,23,Kathmandu
1,Rabin,35,Lalitpur
2,Sabin,40,Bhaktapur


In [42]:
df

Unnamed: 0,Name,Age,City,Salary
0,Nabin,23,Kathmandu,300000
1,Rabin,35,Lalitpur,200000
2,Sabin,40,Bhaktapur,100000


In [43]:
df.drop('Salary', axis=1, inplace=True)

In [44]:
df

Unnamed: 0,Name,Age,City
0,Nabin,23,Kathmandu
1,Rabin,35,Lalitpur
2,Sabin,40,Bhaktapur


In [45]:
## Add age to the column
df['Age'] = df['Age'] + 5
df

Unnamed: 0,Name,Age,City
0,Nabin,28,Kathmandu
1,Rabin,40,Lalitpur
2,Sabin,45,Bhaktapur


In [46]:
df.drop(0)

Unnamed: 0,Name,Age,City
1,Rabin,40,Lalitpur
2,Sabin,45,Bhaktapur


In [47]:
df

Unnamed: 0,Name,Age,City
0,Nabin,28,Kathmandu
1,Rabin,40,Lalitpur
2,Sabin,45,Bhaktapur


In [48]:
df.drop(0, inplace=True)

In [49]:
df

Unnamed: 0,Name,Age,City
1,Rabin,40,Lalitpur
2,Sabin,45,Bhaktapur


In [51]:
df = pd.read_csv('sales_data.csv')
df.head(5)

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal


In [52]:
df.tail(5)

Unnamed: 0,Transaction ID,Date,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
235,10236,2024-08-23,Home Appliances,Nespresso Vertuo Next Coffee and Espresso Maker,1,159.99,159.99,Europe,PayPal
236,10237,2024-08-24,Clothing,Nike Air Force 1 Sneakers,3,90.0,270.0,Asia,Debit Card
237,10238,2024-08-25,Books,The Handmaid's Tale by Margaret Atwood,3,10.99,32.97,North America,Credit Card
238,10239,2024-08-26,Beauty Products,Sunday Riley Luna Sleeping Night Oil,1,55.0,55.0,Europe,PayPal
239,10240,2024-08-27,Sports,Yeti Rambler 20 oz Tumbler,2,29.99,59.98,Asia,Credit Card


In [57]:
# Display the data type of each column
print("Data type of each column :\n", df.dtypes)
print('\n')
# Describe the DataFrame
print("Statistical summary of the DataFrame :\n", df.describe())

Data type of each column :
 Transaction ID        int64
Date                 object
Product Category     object
Product Name         object
Units Sold            int64
Unit Price          float64
Total Revenue       float64
Region               object
Payment Method       object
dtype: object


Statistical summary of the DataFrame :
        Transaction ID  Units Sold   Unit Price  Total Revenue
count       240.00000  240.000000   240.000000     240.000000
mean      10120.50000    2.158333   236.395583     335.699375
std          69.42622    1.322454   429.446695     485.804469
min       10001.00000    1.000000     6.500000       6.500000
25%       10060.75000    1.000000    29.500000      62.965000
50%       10120.50000    2.000000    89.990000     179.970000
75%       10180.25000    3.000000   249.990000     399.225000
max       10240.00000   10.000000  3899.990000    3899.990000


In [58]:
df.describe()

Unnamed: 0,Transaction ID,Units Sold,Unit Price,Total Revenue
count,240.0,240.0,240.0,240.0
mean,10120.5,2.158333,236.395583,335.699375
std,69.42622,1.322454,429.446695,485.804469
min,10001.0,1.0,6.5,6.5
25%,10060.75,1.0,29.5,62.965
50%,10120.5,2.0,89.99,179.97
75%,10180.25,3.0,249.99,399.225
max,10240.0,10.0,3899.99,3899.99
