In [1]:
import pandas as pd

# Q1. List any five functions of the pandas library with execution.

# Example functions:
def example_functions(df):
    """
    Demonstrates various functionalities of pandas library.

    Args:
        df (pd.DataFrame): The DataFrame to perform operations on.
    """

    print("Head:", df.head())  # Display first few rows
    print("Tail:", df.tail())  # Display last few rows
    print("Description:", df.describe())  # Summary statistics
    print("Mean of column 'A':", df['A'].mean())  # Mean of a column
    print("Rows where A > 5:", df.query('A > 5'))  # Filter rows based on condition

# Q2. Given a Pandas DataFrame df with columns 'A', 'B', and 'C', write a Python function to re-index the
# DataFrame with a new index that starts from 1 and increments by 2 for each row.

def reindex_dataframe(df):
    """
    Re-indexes a DataFrame with a new index starting from 1 and incrementing by 2.

    Args:
        df (pd.DataFrame): The DataFrame to re-index.

    Returns:
        pd.DataFrame: The re-indexed DataFrame.
    """
    new_index = range(1, len(df) + 1, 2)
    return df.set_index(new_index)

# Q3. You have a Pandas DataFrame df with a column named 'Values'. Write a Python function that
# iterates over the DataFrame and calculates the sum of the first three values in the 'Values' column. The
# function should print the sum to the console.

def calculate_sum(df):
    """
    Calculates the sum of the first three values in a 'Values' column.

    Args:
        df (pd.DataFrame): The DataFrame containing the 'Values' column.
    """
    sum_of_first_three = df['Values'].iloc[:3].sum()
    print("Sum of first three values:", sum_of_first_three)

# Q4. Given a Pandas DataFrame df with a column 'Text', write a Python function to create a new column
# 'Word_Count' that contains the number of words in each row of the 'Text' column.

def count_words(df):
    """
    Creates a new column 'Word_Count' with the number of words in each row of the 'Text' column.

    Args:
        df (pd.DataFrame): The DataFrame containing the 'Text' column.
    """
    df['Word_Count'] = df['Text'].apply(lambda x: len(x.split()))

# Q5. How are DataFrame.size() and DataFrame.shape() different?

def size_vs_shape(df):
    """
    Explains the difference between DataFrame.size() and DataFrame.shape().
    """
    print("DataFrame.size() returns the total number of elements:", df.size)
    print("DataFrame.shape() returns a tuple of (number of rows, number of columns):", df.shape)

# Q6. Which function of pandas do we use to read an excel file?

def read_excel(filename):
    """
    Reads an Excel file into a DataFrame.

    Args:
        filename (str): The path to the Excel file.

    Returns:
        pd.DataFrame: The DataFrame containing the data from the Excel file.
    """
    return pd.read_excel(filename)

# Q7. You have a Pandas DataFrame df that contains a column named 'Email' that contains email
# addresses in the format 'username@domain.com'. Write a Python function that creates a new column
# 'Username' in df that contains only the username part of each email address.

def extract_username(df):
    """
    Creates a new column 'Username' containing the username part of email addresses.

    Args:
        df (pd.DataFrame): The DataFrame containing the 'Email' column.
    """
    df['Username'] = df['Email'].str.split('@').str[0]

# Q8. You have a Pandas DataFrame df with columns 'A', 'B', and 'C'. Write a Python function that selects
# all rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10. The
# function should return a new DataFrame that contains only the selected rows.

def filter_dataframe(df):
    """
    Selects rows where A > 5 and B < 10 and returns a new DataFrame.

    Args:
        df (pd.DataFrame): The DataFrame to filter.

    Returns:
        pd.DataFrame: The filtered DataFrame.
    """
    return df.query('A > 5 and B < 10')

# Q9. Given a Pandas DataFrame df with a column 'Values', write a Python function to calculate the mean,
# median, and standard deviation of the values in the 'Values' column.

def calculate_statistics(df):
    """
    Calculates mean, median, and standard deviation of the 'Values' column.
    """
    print("Mean:", df['Values'].mean())
    print("Median:", df['Values'].median())
    print("Standard deviation:", df['Values'].std())

# Q10. Given a Pandas DataFrame df with a column 'Sales' and a column 'Date', write a Python function to
# create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days
# for each row in the DataFrame. The moving average should be calculated using a window of size 7 and
# should include the current day.

def calculate_moving_average(df, window_size):
    """
    Creates a new column 'MovingAverage' with the moving average of sales for the past 7 days.

    Args:
        df (pd.DataFrame): The DataFrame containing 'Sales' and 'Date' columns.
        window_size (int): The size of the window for moving average calculation.

    Returns:
        pd.DataFrame: The DataFrame with the added 'MovingAverage' column.
    """
    df['MovingAverage'] = df['Sales'].rolling(window=window_size).mean()
    return df

# Q11. You have a Pandas DataFrame df with a column 'Date'. Write a Python function that creates a new
# column 'Weekday' in the DataFrame. The 'Weekday' column should contain the weekday name (e.g.
# Monday, Tuesday) corresponding to each date in the 'Date' column.

def get_weekday(df):
    """
    Creates a new column 'Weekday' containing the weekday name for each date.

    Args:
        df (pd.DataFrame): The DataFrame containing the 'Date' column.

    Returns:
        pd.DataFrame: The DataFrame with the added 'Weekday' column.
    """
    df['Weekday'] = df['Date'].dt.strftime('%A')
    return df

# Q12. Given a Pandas DataFrame df with a column 'Date' that contains timestamps, write a Python
# function to select all rows where the date is between '2023-01-01' and '2023-01-31'.

def filter_by_date_range(df, start_date, end_date):
    """
    Selects rows within a specified date range.

    Args:
        df (pd.DataFrame): The DataFrame containing the 'Date' column.
        start_date (str): The start date in YYYY-MM-DD format.
        end_date (str): The end date in YYYY-MM-DD format.

    Returns:
        pd.DataFrame: The filtered DataFrame.
    """
    return df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]

# Q13. To use the basic functions of pandas, what is the first and foremost necessary library that needs to
# be imported?

# The `pandas` library is the essential library for using its functions.