In [1]:
import pandas as pd

# Creating a data frame from a dictionary
df = pd.DataFrame({'Name': ['John', 'Mary', 'David', 'Sarah'],
                   'Age': [28, 25, 32, 27],
                   'City': ['New York', 'Chicago', 'Los Angeles', 'Boston']})


In [3]:
# Viewing the first 5 rows of the data frame
print(df.head())
print("--------------------------------------------------------------------------\n")
# Viewing the last 5 rows of the data frame
print(df.tail())
print("--------------------------------------------------------------------------\n")
# Getting an overview of the data frame
print(df.info())
print("--------------------------------------------------------------------------\n")

    Name  Age         City
0   John   28     New York
1   Mary   25      Chicago
2  David   32  Los Angeles
3  Sarah   27       Boston
--------------------------------------------------------------------------

    Name  Age         City
0   John   28     New York
1   Mary   25      Chicago
2  David   32  Los Angeles
3  Sarah   27       Boston
--------------------------------------------------------------------------

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
dtypes: int64(1), object(2)
memory usage: 224.0+ bytes
None
--------------------------------------------------------------------------



In [4]:
# Selecting the 'Name' column
print(df['Name'])
print("--------------------------------------------------------------------------\n")
# Selecting multiple columns
print(df[['Name', 'City']])
print("--------------------------------------------------------------------------\n")
# Using loc function
print(df.loc[:, 'Name'])
print("--------------------------------------------------------------------------\n")
# Using iloc function
print(df.iloc[:, [0, 2]])
print("--------------------------------------------------------------------------\n")

0     John
1     Mary
2    David
3    Sarah
Name: Name, dtype: object
--------------------------------------------------------------------------

    Name         City
0   John     New York
1   Mary      Chicago
2  David  Los Angeles
3  Sarah       Boston
--------------------------------------------------------------------------

0     John
1     Mary
2    David
3    Sarah
Name: Name, dtype: object
--------------------------------------------------------------------------

    Name         City
0   John     New York
1   Mary      Chicago
2  David  Los Angeles
3  Sarah       Boston
--------------------------------------------------------------------------



In [5]:
# Filtering rows based on age
print(df[df['Age'] > 26])
print("--------------------------------------------------------------------------\n")
# Using query function
print(df.query("Age > 26"))


    Name  Age         City
0   John   28     New York
2  David   32  Los Angeles
3  Sarah   27       Boston
--------------------------------------------------------------------------

    Name  Age         City
0   John   28     New York
2  David   32  Los Angeles
3  Sarah   27       Boston


In [6]:
# Sorting by age in ascending order
print(df.sort_values('Age'))
print("--------------------------------------------------------------------------\n")
# Sorting by age in descending order
print(df.sort_values('Age', ascending=False))
print("--------------------------------------------------------------------------\n")

    Name  Age         City
1   Mary   25      Chicago
3  Sarah   27       Boston
0   John   28     New York
2  David   32  Los Angeles
--------------------------------------------------------------------------

    Name  Age         City
2  David   32  Los Angeles
0   John   28     New York
3  Sarah   27       Boston
1   Mary   25      Chicago
--------------------------------------------------------------------------



In [7]:
# Grouping by city and finding the mean age
print(df.groupby('City')['Age'].mean())
print("--------------------------------------------------------------------------\n")
# Grouping by city and finding the number of people in each city
print(df.groupby('City')['Name'].count())


City
Boston         27.0
Chicago        25.0
Los Angeles    32.0
New York       28.0
Name: Age, dtype: float64
--------------------------------------------------------------------------

City
Boston         1
Chicago        1
Los Angeles    1
New York       1
Name: Name, dtype: int64


In [11]:
# Creating two data frames
df1 = pd.DataFrame({'Name': ['John', 'Mary', 'David', 'Sarah'],
                    'Age': [28, 25, 32, 27]})

df2 = pd.DataFrame({'Name': ['John', 'Mary', 'David', 'Sarah'],
                    'City': ['New York', 'Chicago', 'Los Angeles', 'Boston']})

# Merging the two data frames on 'Name' column
merged_df = pd.merge(df1, df2, on='Name')
print(merged_df)
print("--------------------------------------------------------------------------\n")
# Joining the two data frames on 'Name' column
joined_df = df1.join(df2.set_index('Name'), on='Name')
print(joined_df)
print("--------------------------------------------------------------------------\n")

    Name  Age         City
0   John   28     New York
1   Mary   25      Chicago
2  David   32  Los Angeles
3  Sarah   27       Boston
--------------------------------------------------------------------------

    Name  Age         City
0   John   28     New York
1   Mary   25      Chicago
2  David   32  Los Angeles
3  Sarah   27       Boston
--------------------------------------------------------------------------



In [None]:
# Dropping rows with missing values
print(df.dropna())

# Filling missing values with a specific value
print(df.fillna(value=0))

# Interpolating missing values
print(df.interpolate())


In [17]:
# Reshaping using pivot function
df_pivot = df.pivot(index='Name', columns='City', values='Age')
print(df_pivot)
print("--------------------------------------------------------------------------\n")
# Reshaping using melt function
df_melt = pd.melt(df, id_vars=['Name'], value_vars=['Age', 'City'])
print(df_melt)
print("--------------------------------------------------------------------------\n")
# Reshaping using stack function
df_stacked = df.set_index(['Name', 'City']).stack().reset_index()
print(df_stacked)
print("--------------------------------------------------------------------------\n")

City   Boston  Chicago  Los Angeles  New York
Name                                         
David     NaN      NaN         32.0       NaN
John      NaN      NaN          NaN      28.0
Mary      NaN     25.0          NaN       NaN
Sarah    27.0      NaN          NaN       NaN
--------------------------------------------------------------------------

    Name variable        value
0   John      Age           28
1   Mary      Age           25
2  David      Age           32
3  Sarah      Age           27
4   John     City     New York
5   Mary     City      Chicago
6  David     City  Los Angeles
7  Sarah     City       Boston
--------------------------------------------------------------------------

    Name         City level_2   0
0   John     New York     Age  28
1   Mary      Chicago     Age  25
2  David  Los Angeles     Age  32
3  Sarah       Boston     Age  27
--------------------------------------------------------------------------



In [23]:
# Applying a function to a column
df['Age in 10 years'] = df['Age'].apply(lambda x: x+10)
print(df['Age in 10 years'])
print("--------------------------------------------------------------------------\n")
# Applying a function to multiple columns
dt = df[['Age', 'Age in 10 years']].apply(lambda x: x.max() - x.min())
print(dt)
print("--------------------------------------------------------------------------\n")
# Applying a user-defined function to a column
def add_hello(name):
    return 'Hello ' + name

df['Greeting'] = df['Name'].apply(add_hello)
print(df['Greeting'])

0    38
1    35
2    42
3    37
Name: Age in 10 years, dtype: int64
--------------------------------------------------------------------------

Age                7
Age in 10 years    7
dtype: int64
--------------------------------------------------------------------------

0     Hello John
1     Hello Mary
2    Hello David
3    Hello Sarah
Name: Greeting, dtype: object
