In [30]:
#1.DataFrame(): Creates a DataFrame from various data sources.
import pandas as pd

data = {'Name': ['Alice', 'Bob', 'Claire'],
        'Age': [25, 30, 27]}
df = pd.DataFrame(data)


In [31]:
#2.head(): Displays the first few rows of a DataFrame.
print(df.head())


     Name  Age
0   Alice   25
1     Bob   30
2  Claire   27


In [32]:
#3.groupby(): Groups data and performs aggregate operations.
grouped = df.groupby('Age')['Name'].count()
print(grouped)


Age
25    1
27    1
30    1
Name: Name, dtype: int64


In [33]:
#4.sort_values(): Sorts DataFrame rows based on one or more columns
sorted_df = df.sort_values(by='Age', ascending=False)
print(sorted_df)


     Name  Age
1     Bob   30
2  Claire   27
0   Alice   25


In [34]:
#5.pivot_table(): Creates a pivot table from a DataFrame.
pivot_table = df.pivot_table(values='Age', index='Name', aggfunc='mean')
print(pivot_table)


        Age
Name       
Alice    25
Bob      30
Claire   27


In [35]:
#2.
import pandas as pd

def reindex_with_custom_increment(df):
    new_index = range(1, 2 * len(df) + 1, 2)  
    df_reindexed = df.set_index(pd.Index(new_index))  
    return df_reindexed

data = {'A': [10, 20, 30],
        'B': [40, 50, 60],
        'C': [70, 80, 90]}

df = pd.DataFrame(data)

reindexed_df = reindex_with_custom_increment(df)
print(reindexed_df)


    A   B   C
1  10  40  70
3  20  50  80
5  30  60  90


In [36]:
#3.
import pandas as pd

def calculate_sum_of_first_three(df):
    values_column = df['Values']
    sum_first_three = sum(values_column[:3])
    print("Sum of the first three values:", sum_first_three)

data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

calculate_sum_of_first_three(df)


Sum of the first three values: 60


In [37]:
#4.
import pandas as pd

def add_word_count_column(df):
    df['Word_Count'] = df['Text'].apply(lambda x: len(x.split()))
    return df

data = {'Text': ["Hello world", "Python is great", "Data analysis"]}
df = pd.DataFrame(data)

df_with_word_count = add_word_count_column(df)
print(df_with_word_count)


              Text  Word_Count
0      Hello world           2
1  Python is great           3
2    Data analysis           2


#5.DataFrame.size: Returns the total number of elements (cells) in the DataFrame.

DataFrame.shape: Returns a tuple representing the dimensions (rows and columns) of the DataFrame.

In other words, DataFrame.size gives you the count of cells in the DataFrame, while DataFrame.shape gives you the number of rows and columns in the form of a tuple.

#6.
import pandas as pd

df = pd.read_excel('filename.xlsx')

print(df)


In [38]:
#7.
import pandas as pd

def extract_username(df):
    df['Username'] = df['Email'].apply(lambda email: email.split('@')[0])
    return df

data = {'Email': ['john.doe@example.com', 'jane.smith@example.com', 'user123@example.com']}
df = pd.DataFrame(data)

df_with_username = extract_username(df)
print(df_with_username)


                    Email    Username
0    john.doe@example.com    john.doe
1  jane.smith@example.com  jane.smith
2     user123@example.com     user123


In [39]:
#8.
import pandas as pd

def select_rows(df):
    selected_rows = df[(df['A'] > 5) & (df['B'] < 10)]
    return selected_rows
data = {'A': [3, 8, 6, 2, 9],
        'B': [5, 2, 9, 3, 1],
        'C': [1, 7, 4, 5, 2]}
df = pd.DataFrame(data)
selected_df = select_rows(df)
print(selected_df)


   A  B  C
1  8  2  7
2  6  9  4
4  9  1  2


In [40]:
#9.
import pandas as pd

def calculate_statistics(df):
    mean = df['Values'].mean()
    median = df['Values'].median()
    std_dev = df['Values'].std()
    return mean, median, std_dev

data = {'Values': [10, 20, 30, 40, 50]}
df = pd.DataFrame(data)

mean, median, std_dev = calculate_statistics(df)

print("Mean:", mean)
print("Median:", median)
print("Standard Deviation:", std_dev)


Mean: 30.0
Median: 30.0
Standard Deviation: 15.811388300841896


In [41]:
#10.
import pandas as pd

def calculate_moving_average(df):
    df['MovingAverage'] = df['Sales'].rolling(window=7, min_periods=1).mean()
    return df

data = {'Date': pd.date_range(start='2023-01-01', periods=14),
        'Sales': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140]}
df = pd.DataFrame(data)

df_with_moving_avg = calculate_moving_average(df)
print(df_with_moving_avg)


         Date  Sales  MovingAverage
0  2023-01-01     10           10.0
1  2023-01-02     20           15.0
2  2023-01-03     30           20.0
3  2023-01-04     40           25.0
4  2023-01-05     50           30.0
5  2023-01-06     60           35.0
6  2023-01-07     70           40.0
7  2023-01-08     80           50.0
8  2023-01-09     90           60.0
9  2023-01-10    100           70.0
10 2023-01-11    110           80.0
11 2023-01-12    120           90.0
12 2023-01-13    130          100.0
13 2023-01-14    140          110.0


In [42]:
#11.
import pandas as pd

def add_weekday_column(df):
    df['Weekday'] = df['Date'].dt.strftime('%A')
    return df

data = {'Date': pd.to_datetime(['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'])}
df = pd.DataFrame(data)

df_with_weekday = add_weekday_column(df)
print(df_with_weekday)


        Date    Weekday
0 2023-01-01     Sunday
1 2023-01-02     Monday
2 2023-01-03    Tuesday
3 2023-01-04  Wednesday
4 2023-01-05   Thursday


In [43]:
#12.
import pandas as pd

def select_rows_in_date_range(df, start_date, end_date):
    selected_rows = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
    return selected_rows

data = {'Date': pd.to_datetime(['2023-01-15', '2023-01-25', '2023-02-05', '2023-01-10'])}
df = pd.DataFrame(data)

start_date = pd.to_datetime('2023-01-01')
end_date = pd.to_datetime('2023-01-31')
selected_df = select_rows_in_date_range(df, start_date, end_date)
print(selected_df)



        Date
0 2023-01-15
1 2023-01-25
3 2023-01-10


In [44]:
#13.
import pandas as pd


In [45]:
print("finished")

finished


In [None]:
print("systumm")