In [1]:
import pandas as pd

prices = [2999, 15900, 52000, 4999, 1900]
products = ["Wireless Earbuds", "Smartphone", "Laptop", "Smartwatch", "Speaker"]
product_prices = pd.Series(prices, index=products)
print(product_prices)

Wireless Earbuds     2999
Smartphone          15900
Laptop              52000
Smartwatch           4999
Speaker              1900
dtype: int64


In [2]:
print("Mean:", product_prices.mean())

Mean: 15559.6


In [3]:
print("Sum:", product_prices.sum())

Sum: 77798


In [4]:
print("Max:", product_prices.max())

Max: 52000


In [5]:
print("Min:", product_prices.min())

Min: 1900


In [6]:
print("Head (First 3 Elements):\n", product_prices.head(3))

Head (First 3 Elements):
 Wireless Earbuds     2999
Smartphone          15900
Laptop              52000
dtype: int64


In [7]:
print("Tail (Last 2 Elements):\n", product_prices.tail(2))

Tail (Last 2 Elements):
 Smartwatch    4999
Speaker       1900
dtype: int64


In [8]:
print("Apply (Adding 18 % of GST):\n", product_prices.apply(lambda x: x + (x * 0.18)))

Apply (Adding 18 % of GST):
 Wireless Earbuds     3538.82
Smartphone          18762.00
Laptop              61360.00
Smartwatch           5898.82
Speaker              2242.00
dtype: float64


In [9]:
print("Map (Formatting as currency):\n", product_prices.map("{:,.2f}".format))

Map (Formatting as currency):
 Wireless Earbuds     2,999.00
Smartphone          15,900.00
Laptop              52,000.00
Smartwatch           4,999.00
Speaker              1,900.00
dtype: object


In [10]:
print("Sort By Values (Ascending):\n", product_prices.sort_values())

Sort By Values (Ascending):
 Speaker              1900
Wireless Earbuds     2999
Smartwatch           4999
Smartphone          15900
Laptop              52000
dtype: int64


In [11]:
print("Sort by Index (Alphabetical):\n", product_prices.sort_index())

Sort by Index (Alphabetical):
 Laptop              52000
Smartphone          15900
Smartwatch           4999
Speaker              1900
Wireless Earbuds     2999
dtype: int64


In [12]:
print(product_prices.sort_index(ascending=False))

Wireless Earbuds     2999
Speaker              1900
Smartwatch           4999
Smartphone          15900
Laptop              52000
dtype: int64


In [13]:
print("Values Counts:\n", product_prices.value_counts())

Values Counts:
 2999     1
15900    1
52000    1
4999     1
1900     1
Name: count, dtype: int64


In [14]:
data = {
    "Product": ["Wireless Earbuds", "Smartphone", "Laptop", "Smartwatch", "Speaker"],
    "Brand": ["Boat", "Samsung", "Dell", "Apple", "JBL"],
    "Price": [2999, 15900, 52000, 4999, 1900],
    "Stock": [150, 80, 40, 60, 200],
    "BestSeller": [True, True, False, True, False]
}
DF = pd.DataFrame(data)
print(DF)

            Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False


In [15]:
print("Shape of DataFrame:", DF.shape)

Shape of DataFrame: (5, 5)


In [16]:
print("Columns in DataFrame:", DF.columns)

Columns in DataFrame: Index(['Product', 'Brand', 'Price', 'Stock', 'BestSeller'], dtype='object')


In [17]:
print("Index of DataFrame:", DF.index)

Index of DataFrame: RangeIndex(start=0, stop=5, step=1)


In [18]:
print("DataFrame Info:")
print(DF.info())

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Product     5 non-null      object
 1   Brand       5 non-null      object
 2   Price       5 non-null      int64 
 3   Stock       5 non-null      int64 
 4   BestSeller  5 non-null      bool  
dtypes: bool(1), int64(2), object(2)
memory usage: 297.0+ bytes
None


In [19]:
print("Statisticial Summary:\n", DF.describe())

Statisticial Summary:
               Price       Stock
count      5.000000    5.000000
mean   15559.600000  106.000000
std    21118.109771   66.932802
min     1900.000000   40.000000
25%     2999.000000   60.000000
50%     4999.000000   80.000000
75%    15900.000000  150.000000
max    52000.000000  200.000000


In [20]:
print("Head First 3 Rows:\n", DF.head(3))

Head First 3 Rows:
             Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False


In [21]:
print("Tail Last 2 Rows:\n", DF.tail(2))

Tail Last 2 Rows:
       Product  Brand  Price  Stock  BestSeller
3  Smartwatch  Apple   4999     60        True
4     Speaker    JBL   1900    200       False


In [22]:
print(DF)
print(DF.loc[1,"Product"])
print(DF.loc[4,"Product"])
print(DF.loc[4,"Stock"])

print(DF.iloc[4, 0])
print(DF.iloc[2, 1])
print(DF.iloc[3, 3])

            Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False
Smartphone
Speaker
200
Speaker
Dell
60


In [23]:
print("Using iloc (selecting 4th row):")
print(DF.iloc[4])

Using iloc (selecting 4th row):
Product       Speaker
Brand             JBL
Price            1900
Stock             200
BestSeller      False
Name: 4, dtype: object


In [24]:
print(DF)
print("***********************************Using loc***********************************")
print(DF.loc[DF["BestSeller"] == True])
print("***********************************Using loc***********************************")
print(DF.loc[DF["Stock"] < 100])
print("***********************************Using loc***********************************")
print(DF.loc[DF["Price"] < 10000])

            Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False
***********************************Using loc***********************************
            Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
3        Smartwatch    Apple   4999     60        True
***********************************Using loc***********************************
      Product    Brand  Price  Stock  BestSeller
1  Smartphone  Samsung  15900     80        True
2      Laptop     Dell  52000     40       False
3  Smartwatch    Apple   4999     60        True
***********************************Using loc***********************************
            Pr

In [25]:
print("Before DF:", DF)
DF_dropped = DF.drop(columns=["Stock"])
print("After DF_dropped:\n", DF_dropped)
print("After DF (original):", DF)

Before DF:             Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False
After DF_dropped:
             Product    Brand  Price  BestSeller
0  Wireless Earbuds     Boat   2999        True
1        Smartphone  Samsung  15900        True
2            Laptop     Dell  52000       False
3        Smartwatch    Apple   4999        True
4           Speaker      JBL   1900       False
After DF (original):             Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False


In [26]:
DF.rename(columns={"Cost": "Price", "Inventory": "Stock"}, inplace=True)
print("After Renaming Columns:\n", DF)

After Renaming Columns:
             Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False


In [27]:
DF_grouped = DF.groupby("Brand").agg({'Price': 'mean', 'Stock': 'sum'})
print("Grouped DataFrame by Brand:\n", DF_grouped)
DF_grouped = DF.groupby("Brand").agg({'Price': ["mean", "max", "min"]})
print("Grouped DataFrame by Brand:\n", DF_grouped)

Grouped DataFrame by Brand:
            Price  Stock
Brand                  
Apple     4999.0     60
Boat      2999.0    150
Dell     52000.0     40
JBL       1900.0    200
Samsung  15900.0     80
Grouped DataFrame by Brand:
            Price              
            mean    max    min
Brand                         
Apple     4999.0   4999   4999
Boat      2999.0   2999   2999
Dell     52000.0  52000  52000
JBL       1900.0   1900   1900
Samsung  15900.0  15900  15900


In [28]:
data2 = {
    "Brand": ["Boat", "Samsung", "Dell", "Apple", "JBL"],
    "Rating": [4.2, 4.5, 4.0, 4.8, 4.1],
    "Discount": [10, 15, 5, 20, 8]
}
DF_ratings = pd.DataFrame(data2)
print(DF_ratings)

     Brand  Rating  Discount
0     Boat     4.2        10
1  Samsung     4.5        15
2     Dell     4.0         5
3    Apple     4.8        20
4      JBL     4.1         8


In [29]:
DF_Merged = pd.merge(DF, DF_ratings, on="Brand")
print("Merged DataFrame:\n", DF_Merged)

Merged DataFrame:
             Product    Brand  Price  Stock  BestSeller  Rating  Discount
0  Wireless Earbuds     Boat   2999    150        True     4.2        10
1        Smartphone  Samsung  15900     80        True     4.5        15
2            Laptop     Dell  52000     40       False     4.0         5
3        Smartwatch    Apple   4999     60        True     4.8        20
4           Speaker      JBL   1900    200       False     4.1         8


In [30]:
new_data = {
    "Product": ["Tablet", "Smartphone Case"],
    "Brand": ["Samsung", "Apple"],
    "Price": [25000, 1999],
    "Stock": [70, 120],
    "BestSeller": [True, False]
}
new_DF = pd.DataFrame(new_data)
DF_Updated = pd.concat([DF, new_DF], ignore_index=True)
print("Updated DataFrame with New Products:\n", DF_Updated)

Updated DataFrame with New Products:
             Product    Brand  Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999    150        True
1        Smartphone  Samsung  15900     80        True
2            Laptop     Dell  52000     40       False
3        Smartwatch    Apple   4999     60        True
4           Speaker      JBL   1900    200       False
5            Tablet  Samsung  25000     70        True
6   Smartphone Case    Apple   1999    120       False


In [31]:
print(DF["Product"])

0    Wireless Earbuds
1          Smartphone
2              Laptop
3          Smartwatch
4             Speaker
Name: Product, dtype: object


In [32]:
print(DF[["Brand", "Price"]])

     Brand  Price
0     Boat   2999
1  Samsung  15900
2     Dell  52000
3    Apple   4999
4      JBL   1900


In [34]:
DF["Price"] = DF["Price"].astype(float)
print(DF)

            Product    Brand    Price  Stock  BestSeller
0  Wireless Earbuds     Boat   2999.0    150        True
1        Smartphone  Samsung  15900.0     80        True
2            Laptop     Dell  52000.0     40       False
3        Smartwatch    Apple   4999.0     60        True
4           Speaker      JBL   1900.0    200       False


In [35]:
DF["Rank"] = DF["Price"].rank(ascending=True)
print(DF)
print(DF.sort_values(by="Rank", ascending=True))
print(DF.sort_values(by="Rank", ascending=False))

            Product    Brand    Price  Stock  BestSeller  Rank
0  Wireless Earbuds     Boat   2999.0    150        True   2.0
1        Smartphone  Samsung  15900.0     80        True   4.0
2            Laptop     Dell  52000.0     40       False   5.0
3        Smartwatch    Apple   4999.0     60        True   3.0
4           Speaker      JBL   1900.0    200       False   1.0
            Product    Brand    Price  Stock  BestSeller  Rank
4           Speaker      JBL   1900.0    200       False   1.0
0  Wireless Earbuds     Boat   2999.0    150        True   2.0
3        Smartwatch    Apple   4999.0     60        True   3.0
1        Smartphone  Samsung  15900.0     80        True   4.0
2            Laptop     Dell  52000.0     40       False   5.0
            Product    Brand    Price  Stock  BestSeller  Rank
2            Laptop     Dell  52000.0     40       False   5.0
1        Smartphone  Samsung  15900.0     80        True   4.0
3        Smartwatch    Apple   4999.0     60        Tru

In [36]:
import pandas as pd

DF1 = pd.DataFrame({
    "ID": [1, 2,],
    "Name": ["Alice", "Bob"],
    "Age": [25, 30]
})

DF2 = pd.DataFrame({
    "ID": [3, 4,],
    "Name": ["Charlie", "David"],
    "Age": [28, 35]
})

DF_Combined = pd.concat([DF1, DF2], axis = 0)
print("Combined DataFrame:\n", DF_Combined)

Combined DataFrame:
    ID     Name  Age
0   1    Alice   25
1   2      Bob   30
0   3  Charlie   28
1   4    David   35


In [37]:
DF_Combined = pd.concat([DF1, DF2], axis = 1)
print("Combined DataFrame:\n", DF_Combined)

Combined DataFrame:
    ID   Name  Age  ID     Name  Age
0   1  Alice   25   3  Charlie   28
1   2    Bob   30   4    David   35


In [39]:
print(DF)
DF.pivot_table(index="Brand", values="Price", columns="Stock", aggfunc="max")

            Product    Brand    Price  Stock  BestSeller  Rank
0  Wireless Earbuds     Boat   2999.0    150        True   2.0
1        Smartphone  Samsung  15900.0     80        True   4.0
2            Laptop     Dell  52000.0     40       False   5.0
3        Smartwatch    Apple   4999.0     60        True   3.0
4           Speaker      JBL   1900.0    200       False   1.0


Stock,40,60,80,150,200
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Apple,,4999.0,,,
Boat,,,,2999.0,
Dell,52000.0,,,,
JBL,,,,,1900.0
Samsung,,,15900.0,,


In [40]:
pd.crosstab(DF["Brand"], DF["Stock"])

Stock,40,60,80,150,200
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Apple,0,1,0,0,0
Boat,0,0,0,1,0
Dell,1,0,0,0,0
JBL,0,0,0,0,1
Samsung,0,0,1,0,0


In [41]:
import pandas as pd

data = {
    "Purchase Date": ["2023-01-15", "2023-02-20", "2023-03-10", "2023-01-25", "2023-02-14"],
    "Product": ["Wireless Earbuds", "Smartphone", "Laptop", "Smartwatch", "Speaker"],
    "Brand": ["Boat", "Samsung", "Dell", "Apple", "JBL"],
    "Price": [2999, 15900, 52000, 4999, 1900],
    "Quantity": [2, 1, 3, 2, 4]
}

df_sales = pd.DataFrame(data)
print(df_sales)

  Purchase Date           Product    Brand  Price  Quantity
0    2023-01-15  Wireless Earbuds     Boat   2999         2
1    2023-02-20        Smartphone  Samsung  15900         1
2    2023-03-10            Laptop     Dell  52000         3
3    2023-01-25        Smartwatch    Apple   4999         2
4    2023-02-14           Speaker      JBL   1900         4


In [44]:
if 'Purchase Date' in df_sales.columns:
	df_sales['Purchase Date'] = pd.to_datetime(df_sales['Purchase Date'])
	df_sales.set_index('Purchase Date', inplace=True)
print(df_sales)

                        Product    Brand  Price  Quantity
Purchase Date                                            
2023-01-15     Wireless Earbuds     Boat   2999         2
2023-02-20           Smartphone  Samsung  15900         1
2023-03-10               Laptop     Dell  52000         3
2023-01-25           Smartwatch    Apple   4999         2
2023-02-14              Speaker      JBL   1900         4


In [45]:
daily_avg = df_sales.resample('D').mean(numeric_only=True)
print("\n--- Daily Average Sales ---\n")
print(daily_avg)

monthly_avg = df_sales.resample('M').mean(numeric_only=True)
print("\n--- Monthly Average Sales ---\n")
print(monthly_avg)

quaterly_avg = df_sales.resample('Q').mean(numeric_only=True)
print("\n--- Quaterly Average Sales ---\n")
print(quaterly_avg)

yearly_avg = df_sales.resample('Y').mean(numeric_only=True)
print("\n--- Yearly Average Sales ---\n")
print(yearly_avg)


--- Daily Average Sales ---

                 Price  Quantity
Purchase Date                   
2023-01-15      2999.0       2.0
2023-01-16         NaN       NaN
2023-01-17         NaN       NaN
2023-01-18         NaN       NaN
2023-01-19         NaN       NaN
2023-01-20         NaN       NaN
2023-01-21         NaN       NaN
2023-01-22         NaN       NaN
2023-01-23         NaN       NaN
2023-01-24         NaN       NaN
2023-01-25      4999.0       2.0
2023-01-26         NaN       NaN
2023-01-27         NaN       NaN
2023-01-28         NaN       NaN
2023-01-29         NaN       NaN
2023-01-30         NaN       NaN
2023-01-31         NaN       NaN
2023-02-01         NaN       NaN
2023-02-02         NaN       NaN
2023-02-03         NaN       NaN
2023-02-04         NaN       NaN
2023-02-05         NaN       NaN
2023-02-06         NaN       NaN
2023-02-07         NaN       NaN
2023-02-08         NaN       NaN
2023-02-09         NaN       NaN
2023-02-10         NaN       NaN
2023-02-11   

  monthly_avg = df_sales.resample('M').mean(numeric_only=True)
  quaterly_avg = df_sales.resample('Q').mean(numeric_only=True)
  yearly_avg = df_sales.resample('Y').mean(numeric_only=True)


In [47]:
import pandas as pd
import numpy as np

data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [np.nan, 2, 3, np.nan, 5],
    'C': [10, np.nan, np.nan, 40, 50]
}

df = pd.DataFrame(data)
print("Original DataFrame With Missing Values:\n", df)
print(df)
    

Original DataFrame With Missing Values:
      A    B     C
0  1.0  NaN  10.0
1  2.0  2.0   NaN
2  NaN  3.0   NaN
3  4.0  NaN  40.0
4  5.0  5.0  50.0
     A    B     C
0  1.0  NaN  10.0
1  2.0  2.0   NaN
2  NaN  3.0   NaN
3  4.0  NaN  40.0
4  5.0  5.0  50.0


In [48]:
df_dropna = df.dropna()
print("\nDataFrame After Dropping Rows with Missing Values:\n", df_dropna)


DataFrame After Dropping Rows with Missing Values:
      A    B     C
4  5.0  5.0  50.0


In [49]:
df_fillna = df.fillna(0)
print("\nDataFrame After Filling Missing Values with 0:\n", df_fillna)


DataFrame After Filling Missing Values with 0:
      A    B     C
0  1.0  0.0  10.0
1  2.0  2.0   0.0
2  0.0  3.0   0.0
3  4.0  0.0  40.0
4  5.0  5.0  50.0


In [50]:
df_interpolate = df.interpolate()
print("\nDataFrame After Interpolating Missing Values:\n", df_interpolate)


DataFrame After Interpolating Missing Values:
      A    B     C
0  1.0  NaN  10.0
1  2.0  2.0  20.0
2  3.0  3.0  30.0
3  4.0  4.0  40.0
4  5.0  5.0  50.0
