#DataFrame operation:

In [1]:
#1. value_counts()
This function returns a Series containing counts of unique values in a column.
import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35, 25, 30, 35],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'New York', 'San Francisco', 'Los Angeles']
}
df = pd.DataFrame(data)

# Count unique values in the 'Name' column
name_counts = df['Name'].value_counts()

print(name_counts)

Name
Alice      2
Bob        2
Charlie    2
Name: count, dtype: int64


In [2]:
#2. apply()
This function applies a function along an axis of the DataFrame.
python

# Applying a function to each value in the 'Age' column
df['Age_plus_10'] = df['Age'].apply(lambda x: x + 10)

print(df)

      Name  Age           City  Age_plus_10
0    Alice   25       New York           35
1      Bob   30  San Francisco           40
2  Charlie   35    Los Angeles           45
3    Alice   25       New York           35
4      Bob   30  San Francisco           40
5  Charlie   35    Los Angeles           45


In [3]:
#3. unique()
This function returns the unique values in a column.
# Getting unique values from the 'City' column
unique_cities = df['City'].unique()

print(unique_cities)

['New York' 'San Francisco' 'Los Angeles']


In [4]:
#4. nunique()
This function returns the number of unique values in a column.
# Counting unique values in the 'City' column
unique_city_count = df['City'].nunique()

print(unique_city_count)

3


In [5]:
#5. describe()
This function generates descriptive statistics of the DataFrame.
# Generating descriptive statistics
description = df.describe()
print(description)

             Age  Age_plus_10
count   6.000000     6.000000
mean   30.000000    40.000000
std     4.472136     4.472136
min    25.000000    35.000000
25%    26.250000    36.250000
50%    30.000000    40.000000
75%    33.750000    43.750000
max    35.000000    45.000000


In [6]:
#6. sort_values()
This function sorts the DataFrame by the values of one or more columns.
# Sorting by 'Age'
sorted_df = df.sort_values(by='Age')
print(sorted_df)

      Name  Age           City  Age_plus_10
0    Alice   25       New York           35
3    Alice   25       New York           35
1      Bob   30  San Francisco           40
4      Bob   30  San Francisco           40
2  Charlie   35    Los Angeles           45
5  Charlie   35    Los Angeles           45


In [7]:
#7. groupby()
This function groups the DataFrame using a mapper or by a Series of columns.
# Grouping by 'Name' and calculating the mean age
grouped_df = df.groupby('Name')['Age'].mean()
print(grouped_df)

Name
Alice      25.0
Bob        30.0
Charlie    35.0
Name: Age, dtype: float64


In [8]:
#8. isnull() and notnull()
These functions detect missing values.
# Creating a DataFrame with missing values
data_with_nan = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, None, 35],
    'City': ['New York', 'San Francisco', None]
}
df_nan = pd.DataFrame(data_with_nan)

# Detecting missing values
null_values = df_nan.isnull()
not_null_values = df_nan.notnull()

print(null_values)
print(not_null_values)

    Name    Age   City
0  False  False  False
1  False   True  False
2  False  False   True
   Name    Age   City
0  True   True   True
1  True  False   True
2  True   True  False


In [9]:
#9. fillna()
This function fills NA/NaN values using a specified method.
# Filling missing values
filled_df = df_nan.fillna({
    'Age': df_nan['Age'].mean(),
    'City': 'Unknown'
})

print(filled_df)

      Name   Age           City
0    Alice  25.0       New York
1      Bob  30.0  San Francisco
2  Charlie  35.0        Unknown


In [10]:
#10. dropna()
This function removes missing values.
# Dropping rows with any missing values
cleaned_df = df_nan.dropna()

print(cleaned_df)

    Name   Age      City
0  Alice  25.0  New York


In [11]:
#11. astype()
This function casts a pandas object to a specified dtype.
# Changing the dtype of 'Age' column to string
df['Age'] = df['Age'].astype(str)
print(df.dtypes)

Name           object
Age            object
City           object
Age_plus_10     int64
dtype: object


In [None]:
12. pivot_table()
This function creates a spreadsheet-style pivot table as a DataFrame.
python
Copy code
# Creating a pivot table
pivot = df.pivot_table(values='Age', index='Name', columns='City', aggfunc='mean')
print(pivot)

In [15]:
#13. merge()
This function merges DataFrames similar to SQL joins.
# Sample DataFrames for merging
df1 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35]
})

df2 = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'David'],
    'City': ['New York', 'San Francisco', 'Chicago']
})

# Merging DataFrames
merged_df = pd.merge(df1, df2, on='Name', how='inner')

print(merged_df)

    Name  Age           City
0  Alice   25       New York
1    Bob   30  San Francisco
