In [None]:
##PANDAS is a data manipulation library
##Widely used for data analysis and data cleaning tasks
##It provides data structures like Series and DataFrame to handle structured data efficiently
##Series is one dimensional array-like object
##DataFrame is a two dimensional, size mutable, and potentially heterogenous tabular data structure with labeled axes (rows and columns)

In [11]:

import pandas as pd
import numpy as np

In [6]:
#Series is similar to a column in a table
data = [10, 20, 30, 40, 50]
series = pd.Series(data)
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [7]:
##create a series from dictionary elements
data = {'a': 10, 'b': 20, 'c':30}
series2 = pd.Series(data)
print(series2)

a    10
b    20
c    30
dtype: int64


In [8]:
data = [10, 20, 30]
index = ['a', 'b', 'c']
series3 = pd.Series(data, index=index)
print(series3)

a    10
b    20
c    30
dtype: int64


In [10]:
##DataFrame
##create a DataFrame from a dictionary of lists
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [13]:
arr = np.array(df)
print(arr)

[['Alice' 25 'New York']
 ['Bob' 30 'Los Angeles']
 ['Charlie' 35 'Chicago']]


In [14]:
##create a dataframe from a list of dictionaries
data = [
    {'Name': 'David', 'Age': 28, 'City': 'Miami'},
    {'Name': 'Eva', 'Age': 22, 'City': 'Seattle'},
    {'Name': 'Frank', 'Age': 33, 'City': 'Boston'}
]
df2 = pd.DataFrame(data)
print(df2)

    Name  Age     City
0  David   28    Miami
1    Eva   22  Seattle
2  Frank   33   Boston


In [21]:
df3 = pd.read_csv('ecommerce.csv')
df3.head(5)

Unnamed: 0,customer_id,age,gender,country,signup_date,last_purchase_date,num_orders,total_spent,avg_order_value,recency_days,is_premium_member,device_type,preferred_category,churned
0,10001,56.0,Male,India,2022-01-27,2024-04-27,10,13273.25,1327.32,522,0,,Home,0
1,10002,69.0,Other,UK,2025-01-12,2025-08-01,10,,308.9,61,0,Mobile,Grocery,0
2,10003,46.0,Female,USA,2021-04-25,2021-06-01,5,3411.83,682.37,1583,0,,,0
3,10004,32.0,Male,India,2021-02-23,2023-09-22,5,4502.0,900.4,740,0,,,0
4,10005,60.0,Female,India,2021-06-07,2021-12-11,7,3842.1,548.87,1390,0,Tablet,Books,0


In [22]:
df3.tail(5)

Unnamed: 0,customer_id,age,gender,country,signup_date,last_purchase_date,num_orders,total_spent,avg_order_value,recency_days,is_premium_member,device_type,preferred_category,churned
600,10358,25.0,Female,India,2023-09-04,2025-06-18,7,,0.0,105,1,Mobile,Electronics,0
601,10117,50.0,Male,USA,2024-05-11,2024-11-08,4,3640.07,910.02,327,0,Mobile,Electronics,0
602,10599,46.0,Male,India,2020-12-01,2024-06-25,6,2176.22,362.7,463,1,Desktop,Electronics,0
603,10442,32.0,Female,UK,2023-12-20,2024-09-20,3,5478.65,1826.22,376,0,Mobile,Home,0
604,10555,44.0,Female,Germany,2024-06-16,2024-12-09,6,5408.13,901.36,296,0,Mobile,Fashion,0


In [37]:
###accessing data in DataFrame
df3['country'] ##access a single column
##type is Series
df3.loc[0] ##access a single row by label



customer_id                10001
age                         56.0
gender                      Male
country                    India
signup_date           2022-01-27
last_purchase_date    2024-04-27
num_orders                    10
total_spent             13273.25
avg_order_value          1327.32
recency_days                 522
is_premium_member              0
device_type                  NaN
preferred_category          Home
churned                        0
Name: 0, dtype: object

In [35]:
df3.iloc[0] ##access a single row by integer location

customer_id                10001
age                         56.0
gender                      Male
country                    India
signup_date           2022-01-27
last_purchase_date    2024-04-27
num_orders                    10
total_spent             13273.25
avg_order_value          1327.32
recency_days                 522
is_premium_member              0
device_type                  NaN
preferred_category          Home
churned                        0
Name: 0, dtype: object

In [None]:
##accessing a specified element
df3.at[1, 'country']  ##access element at row label 1 and column 'country'


'UK'

In [41]:
df3.iat[1, 3]  ##access element at row index 1 and column index 2

'UK'

In [48]:
##data manipulation with DataFrame
##adding a new column
df['Salary'] = [50000, 60000, 70000]
df


Unnamed: 0,Name,Age,City,Salary
0,Alice,25,New York,50000
1,Bob,30,Los Angeles,60000
2,Charlie,35,Chicago,70000


In [49]:
##removing a column
df.drop('Salary', axis=1, inplace=True) ##default axis=0 for rows, axis=1 for column
df

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,Chicago


In [50]:
##increasing the age by 1 for all rows
df['Age'] = df['Age'] + 1
df

Unnamed: 0,Name,Age,City
0,Alice,26,New York
1,Bob,31,Los Angeles
2,Charlie,36,Chicago


In [51]:
df.drop(0, axis=0, inplace=True) ##drop row with label 0
df

Unnamed: 0,Name,Age,City
1,Bob,31,Los Angeles
2,Charlie,36,Chicago


In [None]:
df3.describe() ###statistical summary of numerical columns


Unnamed: 0,customer_id,age,num_orders,total_spent,avg_order_value,recency_days,is_premium_member,churned
count,605.0,577.0,605.0,571.0,605.0,605.0,605.0,605.0
mean,10301.439669,44.175043,5.945455,8059.212137,1279.377471,456.029752,0.294215,0.036364
std,173.636071,14.978765,2.332892,10346.781856,1123.84184,405.810389,0.456066,0.187348
min,10001.0,18.0,0.0,0.0,0.0,1.0,0.0,0.0
25%,10151.0,32.0,4.0,3920.64,795.78,128.0,0.0,0.0
50%,10302.0,45.0,6.0,6533.98,1152.53,327.0,0.0,0.0
75%,10451.0,56.0,7.0,9523.01,1615.03,696.0,1.0,0.0
max,10600.0,69.0,13.0,123891.81,11431.9,1685.0,1.0,1.0


In [53]:
df3.info()###summary of DataFrame including data types and non-null counts

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 605 entries, 0 to 604
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   customer_id         605 non-null    int64  
 1   age                 577 non-null    float64
 2   gender              571 non-null    object 
 3   country             573 non-null    object 
 4   signup_date         605 non-null    object 
 5   last_purchase_date  605 non-null    object 
 6   num_orders          605 non-null    int64  
 7   total_spent         571 non-null    float64
 8   avg_order_value     605 non-null    float64
 9   recency_days        605 non-null    int64  
 10  is_premium_member   605 non-null    int64  
 11  device_type         575 non-null    object 
 12  preferred_category  583 non-null    object 
 13  churned             605 non-null    int64  
dtypes: float64(3), int64(5), object(6)
memory usage: 66.3+ KB
