### Pandas-DataFrame And Series

Pandas is a powerful data manipulation library in python, widely used for data analysis and data cleaning. It provides two primary data structures: Series and DataFrame. A Series is one-dimensional array-like object, while a DataFrame is a two-dimensional, size-mutable, and potentially heterogeneous tabular data structure with labeled axes (rows and columns).

In [1]:
import pandas as pd

In [2]:
## Series ---> A 1-D array like object that can hold any data type.
import pandas as pd
data = [1,2,3,4,5]
series = pd.Series(data)
print(series)
print(type(series))

0    1
1    2
2    3
3    4
4    5
dtype: int64
<class 'pandas.core.series.Series'>


In [3]:
## create a series from dictionary elements

data = {'a': 1, 'b':2, 'c':3}
series_dict = pd.Series(data)
print(series_dict)

a    1
b    2
c    3
dtype: int64


In [4]:
data = [10,20,30]
index = ['a','b','c']
pd.Series(data,index)

a    10
b    20
c    30
dtype: int64

In [5]:
## DataFrame

## create a dataframe from a dictionary of list
data = {
    'name':['anush','puneet','jack','david'],
    'Age':[34,45,32,56],
    'city': ['Monton','chicago','Florida','Toronto']
}
df = pd.DataFrame(data)
print(df)
print(type(df))

     name  Age     city
0   anush   34   Monton
1  puneet   45  chicago
2    jack   32  Florida
3   david   56  Toronto
<class 'pandas.core.frame.DataFrame'>


In [6]:
## create a dataframe from a list of dictionaries

data = [
    {'name':'anush', 'Age':34, 'City':'Monton'},
    {'name':'puneet', 'Age':45, 'City':'Chicago'},
    {'name':'jack', 'Age':32, 'City':'Florida'},
    {'name':'david', 'Age':56, 'City':'Ohio'},
]
df = pd.DataFrame(data)
print(df)

     name  Age     City
0   anush   34   Monton
1  puneet   45  Chicago
2    jack   32  Florida
3   david   56     Ohio


In [7]:
df = pd.read_csv("coffee_shop_sales.csv")

In [14]:
df.head(5)

Unnamed: 0,Date,Day,Branch,Drink,Size,Quantity,Price,Total_Sale,Customer_Rating
0,2025-10-01,Wednesday,The Bean Spot,Latte,Small,5,180,900,4.2
1,2025-10-02,Thursday,Brew Bliss,Flat White,Large,1,150,150,4.2
2,2025-10-03,Friday,Latte Lounge,Americano,Small,5,180,900,3.4
3,2025-10-04,Saturday,Brew Bliss,Americano,Small,4,150,600,3.9
4,2025-10-05,Sunday,The Bean Spot,Americano,Large,2,120,240,4.0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Date             50 non-null     object 
 1   Day              50 non-null     object 
 2   Branch           50 non-null     object 
 3   Drink            50 non-null     object 
 4   Size             50 non-null     object 
 5   Quantity         50 non-null     int64  
 6   Price            50 non-null     int64  
 7   Total_Sale       50 non-null     int64  
 8   Customer_Rating  50 non-null     float64
dtypes: float64(1), int64(3), object(5)
memory usage: 3.6+ KB


In [8]:
df.describe()

Unnamed: 0,Quantity,Price,Total_Sale,Customer_Rating
count,50.0,50.0,50.0,50.0
mean,2.92,168.8,502.2,3.91
std,1.35285,35.550481,273.027546,0.536143
min,1.0,120.0,120.0,3.0
25%,2.0,150.0,255.0,3.5
50%,3.0,165.0,450.0,3.9
75%,4.0,200.0,660.0,4.4
max,5.0,220.0,1100.0,5.0


In [15]:
df.tail(5)

Unnamed: 0,Date,Day,Branch,Drink,Size,Quantity,Price,Total_Sale,Customer_Rating
45,2025-11-15,Saturday,The Bean Spot,Flat White,Medium,2,200,400,4.7
46,2025-11-16,Sunday,Brew Bliss,Americano,Medium,5,220,1100,3.7
47,2025-11-17,Monday,Brew Bliss,Cappuccino,Medium,2,150,300,3.5
48,2025-11-18,Tuesday,Latte Lounge,Espresso,Large,1,200,200,4.7
49,2025-11-19,Wednesday,The Bean Spot,Flat White,Large,4,200,800,3.3


In [17]:
## Accessing data from data frame

df["Branch"]

0     The Bean Spot
1        Brew Bliss
2      Latte Lounge
3        Brew Bliss
4     The Bean Spot
5     The Bean Spot
6        Brew Bliss
7     The Bean Spot
8        Cafe Aroma
9        Cafe Aroma
10       Brew Bliss
11     Latte Lounge
12       Cafe Aroma
13       Cafe Aroma
14    The Bean Spot
15       Brew Bliss
16    The Bean Spot
17       Brew Bliss
18       Brew Bliss
19     Latte Lounge
20       Cafe Aroma
21    The Bean Spot
22       Brew Bliss
23       Brew Bliss
24       Brew Bliss
25       Brew Bliss
26       Cafe Aroma
27       Brew Bliss
28     Latte Lounge
29     Latte Lounge
30     Latte Lounge
31    The Bean Spot
32       Brew Bliss
33     Latte Lounge
34     Latte Lounge
35       Cafe Aroma
36       Brew Bliss
37       Brew Bliss
38    The Bean Spot
39     Latte Lounge
40       Brew Bliss
41       Cafe Aroma
42     Latte Lounge
43       Cafe Aroma
44    The Bean Spot
45    The Bean Spot
46       Brew Bliss
47       Brew Bliss
48     Latte Lounge
49    The Bean Spot


In [None]:
df.loc[0]   ## row index

Date                  2025-10-01
Day                    Wednesday
Branch             The Bean Spot
Drink                      Latte
Size                       Small
Quantity                       5
Price                        180
Total_Sale                   900
Customer_Rating              4.2
Name: 0, dtype: object

In [None]:
df.iloc[0][2]       ## column index

  df.iloc[0][2]


'The Bean Spot'

In [None]:
## Accessing a specified element
df.at[3, "Branch"]  ## fetches the value at particular position

'Brew Bliss'

In [23]:
## iat

df.iat[2,2]

'Latte Lounge'

In [24]:
## data manipulation with dataframes
data = {
    'name':['anush','puneet','jack','david'],
    'Age':[34,45,32,56],
    'city': ['Monton','chicago','Florida','Toronto']
}
df = pd.DataFrame(data)
print(df)


     name  Age     city
0   anush   34   Monton
1  puneet   45  chicago
2    jack   32  Florida
3   david   56  Toronto


In [46]:
df['Salary'] = [150000, 100000, 90000, 200000]
df

Unnamed: 0,name,Age,city,Salary
0,anush,36,Monton,150000
1,puneet,47,chicago,100000
2,jack,34,Florida,90000
3,david,58,Toronto,200000


In [47]:
df.drop('Salary', axis = 1)       ## by default axis = 0 (row)
## drop removes a column temporarily

Unnamed: 0,name,Age,city
0,anush,36,Monton
1,puneet,47,chicago
2,jack,34,Florida
3,david,58,Toronto


In [48]:
df

Unnamed: 0,name,Age,city,Salary
0,anush,36,Monton,150000
1,puneet,47,chicago,100000
2,jack,34,Florida,90000
3,david,58,Toronto,200000


In [49]:
df.drop('Salary', axis = 1, inplace = True)     ## removes permanently

In [50]:
df

Unnamed: 0,name,Age,city
0,anush,36,Monton
1,puneet,47,chicago
2,jack,34,Florida
3,david,58,Toronto


In [51]:
## Add age to the column
df['Age'] = df['Age']+1
df

Unnamed: 0,name,Age,city
0,anush,37,Monton
1,puneet,48,chicago
2,jack,35,Florida
3,david,59,Toronto


In [53]:
df.describe()

Unnamed: 0,Age
count,4.0
mean,44.75
std,11.086779
min,35.0
25%,36.5
50%,42.5
75%,50.75
max,59.0
