In [1]:
import pandas as pd

In [2]:
## Pandas Series Creation
data = [10, 20, 30, 40, 50]
series = pd.Series(data)
print("Pandas Series:")
print(series)

Pandas Series:
0    10
1    20
2    30
3    40
4    50
dtype: int64


In [3]:
## Create a Series from a dictionary
data_dict = {'a': 1, 'b': 2, 'c': 3}
series_dict = pd.Series(data_dict)
print("\nPandas Series from dictionary:")
print(series_dict)


Pandas Series from dictionary:
a    1
b    2
c    3
dtype: int64


In [4]:
# explanation of DataFrame creation
## DataFrame Creation
data = [10, 20, 30, 40, 50]
index_labels = ['a', 'b', 'c', 'd', 'e']
series_with_index = pd.Series(data, index=index_labels)
print("\nPandas Series with custom index:")
print(series_with_index)


Pandas Series with custom index:
a    10
b    20
c    30
d    40
e    50
dtype: int64


In [5]:
## DataFrame Creation
# explanation: Creating a DataFrame from a dictionary of lists
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [24, 27, 22, 32],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}
df = pd.DataFrame(data)
print("\nPandas DataFrame:")
print(df)


Pandas DataFrame:
      Name  Age         City
0    Alice   24     New York
1      Bob   27  Los Angeles
2  Charlie   22      Chicago
3    David   32      Houston


In [6]:
## Creating a DataFrame from a list of dictionaries
data_list_of_dicts = [
    {'Name': 'Alice', 'Age': 24, 'City': 'New York'},
    {'Name': 'Bob', 'Age': 27, 'City': 'Los Angeles'},
    {'Name': 'Charlie', 'Age': 22, 'City': 'Chicago'},
    {'Name': 'David', 'Age': 32, 'City': 'Houston'}
]
df_list_of_dicts = pd.DataFrame(data_list_of_dicts)
print("\nPandas DataFrame from list of dictionaries:")
print(df_list_of_dicts)


Pandas DataFrame from list of dictionaries:
      Name  Age         City
0    Alice   24     New York
1      Bob   27  Los Angeles
2  Charlie   22      Chicago
3    David   32      Houston


In [7]:
# reading csv file
df_csv = pd.read_csv('Online Sales Data.csv')
print(df_csv.head())

   Transaction ID        Date Product Category             Product Name  \
0           10001  2024-01-01      Electronics            iPhone 14 Pro   
1           10002  2024-01-02  Home Appliances         Dyson V11 Vacuum   
2           10003  2024-01-03         Clothing         Levi's 501 Jeans   
3           10004  2024-01-04            Books        The Da Vinci Code   
4           10005  2024-01-05  Beauty Products  Neutrogena Skincare Set   

   Units Sold  Unit Price  Total Revenue         Region Payment Method  
0           2      999.99        1999.98  North America    Credit Card  
1           1      499.99         499.99         Europe         PayPal  
2           3       69.99         209.97           Asia     Debit Card  
3           4       15.99          63.96  North America    Credit Card  
4           1       89.99          89.99         Europe         PayPal  


In [8]:
# calling column names
print(df_csv.columns)

Index(['Transaction ID', 'Date', 'Product Category', 'Product Name',
       'Units Sold', 'Unit Price', 'Total Revenue', 'Region',
       'Payment Method'],
      dtype='object')


In [9]:
# calling with column names
print(df_csv[['Transaction ID']])

     Transaction ID
0             10001
1             10002
2             10003
3             10004
4             10005
..              ...
235           10236
236           10237
237           10238
238           10239
239           10240

[240 rows x 1 columns]


In [10]:
## iloc and loc usage
# explanation: iloc is used for position-based indexing, while loc is used for label-based indexing.
print(df_csv.iloc[0])  # First row using iloc
print(df_csv.loc[0])   # First row using loc (assuming the index is default)
# more clear with loc
print(df_csv.loc[0:2])  # First three rows using loc (assuming the index is default)
print(df_csv.iloc[0:3])  # First three rows using iloc
print(df_csv.loc[:, ['Transaction ID', 'Product Name']])  # Specific columns using loc
print(df_csv.iloc[:, [0, 1]])  # Specific columns using iloc
# conditional filtering
print(df_csv[df_csv['Units Sold'] > 5])  # Rows where Units Sold is greater than 5
print(df_csv.loc[df_csv['Units Sold'] > 5, ['Transaction ID', 'Units Sold']])  # Using loc for conditional filtering

Transaction ID              10001
Date                   2024-01-01
Product Category      Electronics
Product Name        iPhone 14 Pro
Units Sold                      2
Unit Price                 999.99
Total Revenue             1999.98
Region              North America
Payment Method        Credit Card
Name: 0, dtype: object
Transaction ID              10001
Date                   2024-01-01
Product Category      Electronics
Product Name        iPhone 14 Pro
Units Sold                      2
Unit Price                 999.99
Total Revenue             1999.98
Region              North America
Payment Method        Credit Card
Name: 0, dtype: object
   Transaction ID        Date Product Category      Product Name  Units Sold  \
0           10001  2024-01-01      Electronics     iPhone 14 Pro           2   
1           10002  2024-01-02  Home Appliances  Dyson V11 Vacuum           1   
2           10003  2024-01-03         Clothing  Levi's 501 Jeans           3   

   Unit Price  Total 

In [11]:
## Accesing a specific element
print(df_csv.at[0, 'Transaction ID'])  # Accessing a specific element using at
print(df_csv.iat[0, 0])  # Accessing a specific element using iat

10001
10001


In [12]:
## Pandas Attributes and Methods
# explanation: Demonstrating all attributes and methods of a DataFrame
print(dir(df_csv))
# explanation: Displaying the shape of the DataFrame
print(df_csv.shape)
# explanation: Displaying the data types of each column
print(df_csv.dtypes)
# explanation: Displaying summary statistics of the DataFrame
print(df_csv.describe())
# explanation: Displaying information about the DataFrame
print(df_csv.info())
# explanation: Displaying the first few rows of the DataFrame
print(df_csv.head())
# explanation: Displaying the last few rows of the DataFrame
print(df_csv.tail())
# explanation: Displaying the index of the DataFrame
print(df_csv.index)
# explanation: Displaying the columns of the DataFrame
print(df_csv.columns)
# explanation: Displaying the values of the DataFrame
print(df_csv.values)
# explanation: Transposing the DataFrame
print(df_csv.T)
# explanation: Sorting the DataFrame by a specific column
print(df_csv.sort_values(by='Units Sold'))
# explanation: Checking for null values in the DataFrame
print(df_csv.isnull().sum())
# explanation: Dropping rows with null values
print(df_csv.dropna())
# explanation: Filling null values with a specific value
print(df_csv.fillna(0))
# explanation: Renaming columns in the DataFrame
print(df_csv.rename(columns={'Transaction ID': 'Trans_ID'}))
# explanation: Creating a new column in the DataFrame
df_csv['Total Sales'] = df_csv['Units Sold'] * df_csv['Unit Price']
print(df_csv.head())

# explanation: Grouping data in the DataFrame
grouped = df_csv.groupby('Product Name')['Total Sales'].sum()
print(grouped)

# explanation: Merging two DataFrames
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})
merged = pd.merge(df1, df2, on='key', how='inner')
print(merged)
# explanation: Concatenating two DataFrames
concatenated = pd.concat([df1, df2], ignore_index=True)
print(concatenated)
# explanation: Pivoting the DataFrame
pivoted = df_csv.pivot_table(index='Product Name', values='Total Sales', aggfunc='sum')
print(pivoted)
# explanation: Reshaping the DataFrame using melt
melted = pd.melt(df_csv, id_vars=['Transaction ID'], value_vars=['Units Sold', 'Unit Price'])
print(melted)
# explanation: Sampling random rows from the DataFrame
sampled = df_csv.sample(n=5)
print(sampled)
# explanation: Applying a function to a column in the DataFrame
df_csv['Discounted Price'] = df_csv['Unit Price'].apply(lambda x: x * 0.9)
print(df_csv.head())
# explanation: Saving the DataFrame to a CSV file
df_csv.to_csv('Modified_Online_Sales_Data.csv', index=False)


['Date', 'Region', 'T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__arrow_c_stream__', '__bool__', '__class__', '__contains__', '__copy__', '__dataframe__', '__dataframe_consortium_standard__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__firstlineno__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pandas_priority__', '__pos__', '__pow__', '__radd__', '__rand__', '__rdivmo