In [1]:
#Q1. List any five functions of the pandas library with execution
import pandas as pd

# Creating a sample DataFrame
data = {
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]
}
df = pd.DataFrame(data)

# 1. head() - Returns the first n rows of the DataFrame
print("head():")
print(df.head(2))

# 2. describe() - Generates descriptive statistics
print("\ndescribe():")
print(df.describe())

# 3. drop() - Drops specified labels from rows or columns
print("\ndrop():")
df_dropped = df.drop('B', axis=1)
print(df_dropped)

# 4. sort_values() - Sorts the DataFrame by specified column(s)
print("\nsort_values():")
df_sorted = df.sort_values(by='A', ascending=False)
print(df_sorted)

# 5. merge() - Merges DataFrame or named Series objects with a database-style join
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['B', 'C', 'D'], 'value': [4, 5, 6]})
print("\nmerge():")
df_merged = pd.merge(df1, df2, on='key')
print(df_merged)


head():
   A  B   C
0  1  5   9
1  2  6  10

describe():
              A         B          C
count  4.000000  4.000000   4.000000
mean   2.500000  6.500000  10.500000
std    1.290994  1.290994   1.290994
min    1.000000  5.000000   9.000000
25%    1.750000  5.750000   9.750000
50%    2.500000  6.500000  10.500000
75%    3.250000  7.250000  11.250000
max    4.000000  8.000000  12.000000

drop():
   A   C
0  1   9
1  2  10
2  3  11
3  4  12

sort_values():
   A  B   C
3  4  8  12
2  3  7  11
1  2  6  10
0  1  5   9

merge():
  key  value_x  value_y
0   B        2        4
1   C        3        5


In [2]:
#Q2. Re-index the DataFrame with a new index that starts from 1 and increments by 2 for each row.
def reindex_dataframe(df):
    df.index = range(1, 2*len(df)+1, 2)
    return df

# Example DataFrame
df = pd.DataFrame(data)
df_reindexed = reindex_dataframe(df)
print("\nRe-indexed DataFrame:")
print(df_reindexed)



Re-indexed DataFrame:
   A  B   C
1  1  5   9
3  2  6  10
5  3  7  11
7  4  8  12


In [3]:
#Q3. Calculate the sum of the first three values in the 'Values' column.
def sum_first_three_values(df):
    values_sum = df['Values'].head(3).sum()
    print(f"Sum of the first three values: {values_sum}")

# Example DataFrame
df_values = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
sum_first_three_values(df_values)


Sum of the first three values: 60


In [4]:
#Q4. Create a new column 'Word_Count' that contains the number of words in each row of the 'Text' column.
def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(str(x).split()))
    return df

# Example DataFrame
df_text = pd.DataFrame({'Text': ['Hello world', 'Pandas is great', 'This is an example']})
df_text = add_word_count_column(df_text)
print("\nDataFrame with Word_Count:")
print(df_text)



DataFrame with Word_Count:
                 Text  Word_Count
0         Hello world           2
1     Pandas is great           3
2  This is an example           4


In [5]:
# Q5. Difference between DataFrame.size() and DataFrame.shape().
# DataFrame.size: Returns the number of elements in the DataFrame (rows * columns).
# DataFrame.shape: Returns a tuple representing the dimensionality of the DataFrame (rows, columns).

In [6]:
# Q6. Function to read an Excel file in pandas.
# The function used to read an Excel file is pd.read_excel()
# Example:
# df = pd.read_excel('file_path.xlsx')


In [7]:
# Q7. Create a new column 'Username' in the DataFrame that contains only the username part of each email address.
def extract_username(df):
    df['Username'] = df['Email'].apply(lambda x: x.split('@')[0])
    return df

# Example DataFrame
df_email = pd.DataFrame({'Email': ['john.doe@example.com', 'jane.smith@domain.com']})
df_email = extract_username(df_email)
print("\nDataFrame with Username:")
print(df_email)



DataFrame with Username:
                   Email    Username
0   john.doe@example.com    john.doe
1  jane.smith@domain.com  jane.smith


In [9]:
#Q8. Select rows where the value in column 'A' is greater than 5 and the value in column 'B' is less than 10.
def select_rows(df):
    return df[(df['A'] > 5) & (df['B'] < 10)]

# Example DataFrame
df_selection = pd.DataFrame({'A': [3, 8, 6, 2, 9], 'B': [5, 2, 9, 3, 1], 'C': [1, 7, 4, 5, 2]})
selected_df = select_rows(df_selection)
print("\nSelected DataFrame:")
print(selected_df)



Selected DataFrame:
   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [10]:
# Q9. Calculate the mean, median, and standard deviation of the values in the 'Values' column.
def calculate_statistics(df):
    mean = df['Values'].mean()
    median = df['Values'].median()
    std_dev = df['Values'].std()
    print(f"Mean: {mean}, Median: {median}, Standard Deviation: {std_dev}")

# Example DataFrame
df_stats = pd.DataFrame({'Values': [10, 20, 30, 40, 50]})
calculate_statistics(df_stats)


Mean: 30.0, Median: 30.0, Standard Deviation: 15.811388300841896


In [13]:


# Q10. Create a new column 'MovingAverage' that contains the moving average of the sales for the past 7 days.
def add_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7).mean()
    return df

# Example DataFrame
df_sales = pd.DataFrame({'Date': pd.date_range(start='1/1/2023', periods=10),
                         'Sales': [200, 210, 215, 220, 225, 230, 235, 240, 245, 250]})
df_sales = add_moving_average(df_sales)
print("\nDataFrame with MovingAverage:")
print(df_sales)



DataFrame with MovingAverage:
        Date  Sales  MovingAverage
0 2023-01-01    200            NaN
1 2023-01-02    210            NaN
2 2023-01-03    215            NaN
3 2023-01-04    220            NaN
4 2023-01-05    225            NaN
5 2023-01-06    230            NaN
6 2023-01-07    235     219.285714
7 2023-01-08    240     225.000000
8 2023-01-09    245     230.000000
9 2023-01-10    250     235.000000


In [14]:
# Q11. Create a new column 'Weekday' that contains the weekday name corresponding to each date.
def add_weekday_column(df):
    df['Weekday'] = pd.to_datetime(df['Date']).dt.day_name()
    return df

# Example DataFrame
df_date = pd.DataFrame({'Date': pd.date_range(start='1/1/2023', periods=5)})
df_date = add_weekday_column(df_date)
print("\nDataFrame with Weekday:")
print(df_date)



DataFrame with Weekday:
        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday
