In [27]:
import pandas as pd
import sqlite3
import os

#### **Merging and Joining**
1. **Inner Join on Chinook Database**
   - Load the `chinook.db` database.
   - Perform an inner join between the `customers` and `invoices` tables on the `CustomerId` column.
   - Find the total number of invoices for each customer.

2. **Outer Join on Movie Data**
   - Load the `movie.csv` file.
   - Create two smaller DataFrames:
     - One with only `director_name` and `color`.
     - Another with `director_name` and `num_critic_for_reviews`.
   - Perform a left join and then a full outer join on `director_name`.
   - Count how many rows are in the resulting DataFrames for each join type.

---


In [28]:
# Part 1 Task 1
def load_chinook():
    try:
        path = r'../data/chinook.db'
        with sqlite3.connect(path) as connection:
            customers = pd.read_sql(
                'SELECT * from customers',
                con = connection
            )
            invoices = pd.read_sql(
                'SELECT * from invoices',
                con = connection
            )
        return customers, invoices
    except sqlite3.OperationalError as e:
        print(f'Error occured {e}')
        return None
    except Exception as e:
        print(f'Unexpected error happen {e}')
        return None

In [43]:
def work_with_chinook(customers, invoices):
    inner_join = customers.merge(invoices, how = 'inner', on = 'CustomerId')
    return inner_join

In [44]:
customers, invoices = load_chinook()

In [51]:
inner_join = work_with_chinook(customers, invoices)

In [30]:
def load_movie():
    try:
        path = r'../data/movies.csv'
        if not os.path.exists(path):
            print(f'File Not found at {path}')
            return None
        movie_data = pd.read_csv(path)
        return movie_data
    except Exception as e:
        print(f'Unexpected error happen {e}')
        return None

In [31]:
def work_with_movies(movie_data):
    pass


#### **Grouping and Aggregating**
1. **Grouped Aggregations on Titanic**
   - Group passengers by `Pclass` and calculate the following:
     - Average age.
     - Total fare.
     - Count of passengers.
   - Save the results to a new DataFrame.

2. **Multi-level Grouping on Movie Data**
   - Group the movies by `color` and `director_name`.
   - Find:
     - Total `num_critic_for_reviews` for each group.
     - Average `duration` for each group.

3. **Nested Grouping on Flights**
   - Group flights by `Year` and `Month` and calculate:
     - Total number of flights.
     - Average arrival delay (`ArrDelay`).
     - Maximum departure delay (`DepDelay`).

---


In [32]:
def load_titanic():
    try:
        path = r'../data/titanic.xlsx'
        titanic_data = pd.read_excel(path, sheet_name=0)
        return titanic_data
    except Exception as e:
        print(f'Unexpected error happen {e}')
        return None

In [33]:
def grouping_titanic():
    pass

In [34]:
def grouping_movies():
    pass

In [35]:
def load_flights():
    try:
        path = r'../data/flights'
        if not os.path.exists(path):
            print(f'File not found in {path}')
            return None
        flights_data = pd.read_parquet(path)
        return flights_data

    except Exception as e:
        print(f'Error occured {e}')
        return None

In [36]:
def grouping_flights():
    pass

---

#### **Applying Functions**
1. **Apply a Custom Function on Titanic**
   - Write a function to classify passengers as `Child` (age < 18) or `Adult`.
   - Use `apply` to create a new column, `Age_Group`, with these values.

2. **Normalize Employee Salaries**
   - Load the `employee.csv` file.
   - Normalize the salaries within each department.

3. **Custom Function on Movies**
   - Write a function that returns `Short`, `Medium`, or `Long` based on the duration of a movie:
     - `Short`: Less than 60 minutes.
     - `Medium`: Between 60 and 120 minutes.
     - `Long`: More than 120 minutes.
   - Apply this function to classify movies in the `movie.csv` dataset.

---


In [37]:
def cutom_function_on_titanic():
    pass

In [38]:
def load_employee():
    try:
        path = r'../data/employee.csv'
        if not os.path.exists(path):
            print(f'File not found in {path}')
            return None
        employee_data = pd.read_csv(path)
        return employee_data

    except Exception as e:
        print(f'Error occured {e}')
        return None

In [39]:
def reshape_employee():
    pass

In [40]:
def custom_functions_on_movie():
    pass

#### **Using `pipe`**
1. **Pipeline on Titanic**
   - Create a pipeline to:
     - Filter passengers who survived (`Survived == 1`).
     - Fill missing `Age` values with the mean.
     - Create a new column, `Fare_Per_Age`, by dividing `Fare` by `Age`.

2. **Pipeline on Flights**
   - Create a pipeline to:
     - Filter flights with a departure delay greater than 30 minutes.
     - Add a column `Delay_Per_Hour` by dividing the delay by the scheduled flight duration.

In [41]:
def main():
    pass

In [42]:
if __name__ == '__main__':
    main()