In [None]:
# Q1: List any five functions of the pandas library with execution.

import pandas as pd

# 1. pd.read_csv() - Reads a CSV file
# Let's create a simple DataFrame for demonstration purposes

# Sample data for demonstration
data = {
    'Name': ['Alice', 'Bob', 'Claire'],
    'Age': [25, 30, 27],
    'Gender': ['Female', 'Male', 'Female']
}

# Create DataFrame
df = pd.DataFrame(data)
df.head()


# 2. df.head() - Shows the first 5 rows of the DataFrame
print(df.head())

# 3. df.describe() - Returns summary statistics of DataFrame
print(df.describe())

# 4. df.drop() - Drops specified labels from the DataFrame
df_dropped = df.drop('Age', axis=1)
print(df_dropped)

# 5. df.groupby() - Groups the DataFrame using a column
grouped_df = df.groupby('Category_column').mean()
print(grouped_df)


# Q2: Re-index the DataFrame with a new index that starts from 1 and increments by 2 for each row.

def reindex_dataframe(df):
    df.index = range(1, 2 * len(df), 2)  # Set new index starting from 1 and increment by 2
    return df

# Example DataFrame
df2 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]})
df2_reindexed = reindex_dataframe(df2)
print(df2_reindexed)


# Q3: Sum of first three values in the 'Values' column

def sum_first_three(df):
    sum_values = df['Values'].head(3).sum()
    print(f"Sum of first three values: {sum_values}")

# Example DataFrame
df3 = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_first_three(df3)


# Q4: Create a new column 'Word_Count' based on 'Text' column

def word_count(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Example DataFrame
df4 = pd.DataFrame({'Text': ['This is a sentence', 'Another one', 'Hello world!']})
df4_with_word_count = word_count(df4)
print(df4_with_word_count)


# Q5: Difference between DataFrame.size() and DataFrame.shape()

# DataFrame.size returns the total number of elements in the DataFrame (rows * columns)
# DataFrame.shape returns the dimensions of the DataFrame as a tuple (rows, columns)

print(f"Size: {df4.size}")
print(f"Shape: {df4.shape}")


# Q6: Function to read an Excel file
# pd.read_excel() - Reads an Excel file

df_excel = pd.read_excel('sample.xlsx')
print(df_excel)


# Q7: Create a new 'Username' column from email addresses

def extract_username(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Example DataFrame
df5 = pd.DataFrame({'Email': ['john.doe@example.com', 'alice.smith@domain.com']})
df5_with_username = extract_username(df5)
print(df5_with_username)


# Q8: Select rows where 'A' > 5 and 'B' < 10

def filter_rows(df):
    return df[(df['A'] > 5) & (df['B'] < 10)]

# Example DataFrame
df6 = pd.DataFrame({'A': [3, 8, 6, 2, 9], 'B': [5, 2, 9, 3, 1], 'C': [1, 7, 4, 5, 2]})
filtered_df = filter_rows(df6)
print(filtered_df)


# Q9: Calculate mean, median, and standard deviation for the 'Values' column

def calculate_statistics(df):
    mean_val = df['Values'].mean()
    median_val = df['Values'].median()
    std_val = df['Values'].std()
    print(f"Mean: {mean_val}, Median: {median_val}, Std: {std_val}")

# Example DataFrame
df7 = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
calculate_statistics(df7)


# Q10: Create a 'MovingAverage' column for sales

def moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7).mean()
    return df

# Example DataFrame
df8 = pd.DataFrame({'Sales': [100, 200, 300, 400, 500, 600, 700, 800, 900]})
df8_with_ma = moving_average(df8)
print(df8_with_ma)


# Q11: Create a new 'Weekday' column from the 'Date' column

def extract_weekday(df):
    df['Weekday'] = pd.to_datetime(df['Date']).dt.day_name()
    return df

# Example DataFrame
df9 = pd.DataFrame({'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05']})
df9_with_weekday = extract_weekday(df9)
print(df9_with_weekday)


# Q12: Select rows where 'Date' is between '2023-01-01' and '2023-01-31'

def select_date_range(df):
    df['Date'] = pd.to_datetime(df['Date'])
    return df[(df['Date'] >= '2023-01-01') & (df['Date'] <= '2023-01-31')]

# Example DataFrame
df10 = pd.DataFrame({'Date': ['2023-01-01', '2023-01-15', '2023-02-01']})
df10_selected = select_date_range(df10)
print(df10_selected)


# Q13: The first necessary library to import for using pandas
import pandas as pd
