#### Jupyter Magic Commands:
These are special commands used to enhance functionality.
They are not part of Pandas library itself but used while working with Pandas.
#### Line Magic:
1) %matplotlib inline - For plots within notebook
2) %load_ext - For loading Python extensions
#### Cell Magic:
1) %%timeit -timing of execution of whole cell
2) %%html - For rendering HTML
3) %%bash -  Executing shell command

In [1]:
#There are certain methods in Pandas which are behaving like magic functions.
#.The pandas query() method provides a way to filter a DataFrame using a string expression, similar to a SQL WHERE clause
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [20, 22, 21, 23, 20],
    'Major': ['History', 'Math', 'Physics', 'Math', 'History'],
    'GPA': [3.5, 3.8, 3.2, 3.9, 3.7]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
      Name  Age    Major  GPA
0    Alice   20  History  3.5
1      Bob   22     Math  3.8
2  Charlie   21  Physics  3.2
3    David   23     Math  3.9
4      Eve   20  History  3.7


In [2]:
result_single_condition = df.query('Age > 21')
print("\nStudents older than 21:")
print(result_single_condition)


Students older than 21:
    Name  Age Major  GPA
1    Bob   22  Math  3.8
3  David   23  Math  3.9


In [3]:
min_gpa = 3.6
result_with_variable = df.query('GPA > @min_gpa and Major == "History"')
print("\nHistory majors with GPA > 3.6:")
print(result_with_variable)


History majors with GPA > 3.6:
  Name  Age    Major  GPA
4  Eve   20  History  3.7


In [4]:
#Pipe function
#The pandas.DataFrame.pipe() method allows you to chain custom functions together
import pandas as pd

# Sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David'],
        'Age': [25, 30, 22, 35],
        'City': ['New York', 'London', 'Paris', 'New York'],
        'Salary': [50000, 60000, 45000, 70000]}
df = pd.DataFrame(data)

# Define custom functions for data processing
def capitalize_city(df_input):
    """Capitalizes the 'City' column."""
    df_output = df_input.copy()
    df_output['City'] = df_output['City'].str.capitalize()
    return df_output

def add_bonus(df_input, bonus_percentage):
    """Adds a bonus to the 'Salary' column."""
    df_output = df_input.copy()
    df_output['Salary'] = df_output['Salary'] * (1 + bonus_percentage)
    return df_output

def filter_by_age(df_input, min_age):
    """Filters the DataFrame by age."""
    df_output = df_input[df_input['Age'] >= min_age].copy()
    return df_output

# Using .pipe() to chain the functions
processed_df = (
    df.pipe(capitalize_city)
    .pipe(add_bonus, bonus_percentage=0.10)  # Pass additional arguments here
    .pipe(filter_by_age, min_age=25)
)

print(processed_df)

    Name  Age      City   Salary
0  Alice   25  New york  55000.0
1    Bob   30    London  66000.0
3  David   35  New york  77000.0


In [5]:
"""The pandas.DataFrame.assign() method creates new columns or modifies existing ones in a DataFrame,
 returning a new DataFrame with the changes"""
import pandas as pd

# Create a sample DataFrame
df = pd.DataFrame({
    'product_id': [101, 102, 103, 104],
    'price': [25.99, 45.50, 12.75, 67.25],
    'quantity': [10, 5, 20, 8]
})

print("Original DataFrame:")
print(df)

# Add new columns using .assign()
# 'currency' and 'in_stock' are assigned constant values
# 'total_value' is calculated based on existing columns
df_updated = df.assign(
    currency='USD',
    in_stock=True,
    total_value=lambda x: x['price'] * x['quantity']
)

print("\nUpdated DataFrame using .assign():")
print(df_updated)

# You can also add multiple columns where one depends on another created within the same assign call
df_further_updated = df.assign(
    total_value=lambda x: x['price'] * x['quantity'],
    discounted_value=lambda x: x['total_value'] * 0.9  # 10% discount
)

print("\nDataFrame with dependent columns created in one .assign() call:")
print(df_further_updated)

Original DataFrame:
   product_id  price  quantity
0         101  25.99        10
1         102  45.50         5
2         103  12.75        20
3         104  67.25         8

Updated DataFrame using .assign():
   product_id  price  quantity currency  in_stock  total_value
0         101  25.99        10      USD      True        259.9
1         102  45.50         5      USD      True        227.5
2         103  12.75        20      USD      True        255.0
3         104  67.25         8      USD      True        538.0

DataFrame with dependent columns created in one .assign() call:
   product_id  price  quantity  total_value  discounted_value
0         101  25.99        10        259.9            233.91
1         102  45.50         5        227.5            204.75
2         103  12.75        20        255.0            229.50
3         104  67.25         8        538.0            484.20


In [6]:
"""explode() method transforms each element of a list-like entry within a specified column into a separate row"""
import pandas as pd

data = {
    'ID': [1, 2, 3],
    'Items': [['Apple', 'Banana'], ['Orange'], ['Grape', 'Mango', 'Kiwi']]
}
df = pd.DataFrame(data)
print("Original DataFrame:")
print(df)

Original DataFrame:
   ID                 Items
0   1       [Apple, Banana]
1   2              [Orange]
2   3  [Grape, Mango, Kiwi]


In [7]:
df_exploded = df.explode('Items')
print("\nDataFrame after explode():")
print(df_exploded)


DataFrame after explode():
   ID   Items
0   1   Apple
0   1  Banana
1   2  Orange
2   3   Grape
2   3   Mango
2   3    Kiwi


In [8]:
"""The pandas.DataFrame.select_dtypes() method is used to select columns from a DataFrame based on their data types.
It allows for including or excluding specific data types."""
import pandas as pd
import numpy as np

# Create a sample DataFrame
data = {
    'col_int': [1, 2, 3],
    'col_float': [1.1, 2.2, 3.3],
    'col_str': ['a', 'b', 'c'],
    'col_bool': [True, False, True]
}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)
print("\nData types of columns:")
print(df.dtypes)

# 1. Select columns with numeric data types (integers and floats)
numeric_cols = df.select_dtypes(include=np.number)
print("\nColumns with numeric data types:")
print(numeric_cols)

# 2. Select columns with string (object) data type
string_cols = df.select_dtypes(include='object')
print("\nColumns with string data type:")
print(string_cols)

# 3. Exclude columns with float data type
non_float_cols = df.select_dtypes(exclude='float64')
print("\nColumns excluding float data type:")
print(non_float_cols)

# 4. Select columns with multiple specified data types
int_and_bool_cols = df.select_dtypes(include=['int64', 'bool'])
print("\nColumns with integer or boolean data types:")
print(int_and_bool_cols)

Original DataFrame:
   col_int  col_float col_str  col_bool
0        1        1.1       a      True
1        2        2.2       b     False
2        3        3.3       c      True

Data types of columns:
col_int        int64
col_float    float64
col_str       object
col_bool        bool
dtype: object

Columns with numeric data types:
   col_int  col_float
0        1        1.1
1        2        2.2
2        3        3.3

Columns with string data type:
  col_str
0       a
1       b
2       c

Columns excluding float data type:
   col_int col_str  col_bool
0        1       a      True
1        2       b     False
2        3       c      True

Columns with integer or boolean data types:
   col_int  col_bool
0        1      True
1        2     False
2        3      True
