# Initialization Notebook

In [None]:

%%capture
!pip install pandas numpy matplotlib seaborn scikit-learn
statsmodels scipy joblib plotly

### Libraries have been installed successfully.

In [None]:

import os
import sys
import time


### General libraries imported.

In [None]:

import pandas as pd
import numpy as np


In [None]:
import statsmodels.api as sm
from scipy import stats

### Analytic libraries imported.

In [None]:
import plotly.express as px

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns


### Visualization libraries imported.

In [None]:

ROOT_DIR = os.getcwd()


In [None]:

DATAFRAME_SAMPLE = pd.DataFrame({
    'A': np.random.rand(5),
    'B': np.random.rand(5)
})


In [None]:

PLOT_STYLE = sns.set_style("whitegrid")


In [None]:
SAVED_MODEL_PATH = os.path.join(ROOT_DIR, "saved_model.pkl")

SAVED_MODEL_PATH = 'Path to save or load models using joblib'

In [None]:

from IPython.display import display

variables_summary = pd.DataFrame({
    'Category': ['General', 'Analytics', 'Visual'],
    'Variables': ['ROOT_DIR', 'DATAFRAME_SAMPLE', 'PLOT_STYLE']
})
display(variables_summary)


In [None]:

help_dict = {
    'Variables': {
        'ROOT_DIR': 'Root directory of the current notebook.',
        'DATAFRAME_SAMPLE': 'Sample DataFrame with random data.',
        'PLOT_STYLE': 'Style settings for Seaborn plots.',
        'TODAY': "Today's date.",
        'DATE_STR': "Today's date as a string.",
        'CPU_CORES': 'Number of available CPU cores.'
    },
    'Functions': {
        'to_integer': {
            'description': 'Convert DataFrame column to integer.',
            'example': 'to_integer(DATAFRAME_SAMPLE["A"])'
        },
        'to_float': {
            'description': 'Convert DataFrame column to float.',
            'example': 'to_float(DATAFRAME_SAMPLE["A"])'
        },
        'to_string': {
            'description': 'Convert DataFrame column to string.',
            'example': 'to_string(DATAFRAME_SAMPLE["A"])'
        },
        'to_lowercase': {
            'description': 'Convert DataFrame column strings to lowercase.',
            'example': 'to_lowercase(DATAFRAME_SAMPLE["A"].astype(str))'
        },
        'to_uppercase': {
            'description': 'Convert DataFrame column strings to uppercase.',
            'example': 'to_uppercase(DATAFRAME_SAMPLE["A"].astype(str))'
        },
        'to_datetime': {
            'description': 'Convert DataFrame column string to datetime.',
            'example': 'to_datetime(DATAFRAME_SAMPLE["A"].astype(str))'
        },
        'datetime_to_string': {
            'description': 'Convert DataFrame column datetime to string.',
            'example': 'datetime_to_string(TODAY)'
        },
        'to_boolean': {
            'description': 'Convert DataFrame column to boolean.',
            'example': 'to_boolean(DATAFRAME_SAMPLE["A"] > 0.5)'
        },
        'to_category': {
            'description': 'Convert DataFrame column to category datatype.',
            'example': 'to_category(DATAFRAME_SAMPLE["A"].astype(str))'
        },
        'fillna_with_default': {
            'description': 'Fill NaN values in DataFrame column with a default value.',
            'example': 'fillna_with_default(DATAFRAME_SAMPLE["A"], default_value=0)'
        }
    }
}



### Congratulations! The notebook has finished running.

In [None]:

from datetime import datetime, timedelta


In [None]:

TODAY = datetime.today()
DATE_STR = TODAY.strftime('%Y-%m-%d')


### Date and Time utilities have been set up.

In [None]:

def to_integer(df_column):
    return df_column.astype(int)

def to_string(df_column):
    return df_column.astype(str)


### Conversion utilities have been defined.

In [None]:

# Numeric Conversions
def to_integer(df_column):
    return df_column.astype(int)

def to_float(df_column):
    return df_column.astype(float)

# String Conversions
def to_string(df_column):
    return df_column.astype(str)

def to_lowercase(df_column):
    return df_column.str.lower()

def to_uppercase(df_column):
    return df_column.str.upper()

# Datetime Conversions
def to_datetime(df_column, format='%Y-%m-%d'):
    return pd.to_datetime(df_column, format=format)

def datetime_to_string(df_column, format='%Y-%m-%d'):
    return df_column.dt.strftime(format)

# Boolean Conversions
def to_boolean(df_column):
    return df_column.astype(bool)

# Category Conversions
def to_category(df_column):
    return df_column.astype('category')

# Handling Missing Data
def fillna_with_default(df_column, default_value=0):
    return df_column.fillna(default_value)


In [None]:

def simple_plot(x, y, title="Simple Plot"):
    plt.figure(figsize=(10,6))
    plt.plot(x, y)
    plt.title(title)
    plt.show()


### Plotting utilities have been defined.

In [None]:

from multiprocessing import Pool, cpu_count


In [None]:

CPU_CORES = cpu_count()


### Parallel processing libraries and settings imported.

In [None]:

import cProfile


In [None]:

def profile_function(func):
    profiler = cProfile.Profile()
    profiler.enable()
    result = func()
    profiler.disable()
    profiler.print_stats(sort='time')
    return result


In [None]:
import joblib

In [None]:

variables_summary = pd.DataFrame({
    'Category': ['General', 'Analytics', 'Visual', 'Date & Time', 'Parallel Processing'],
    'Variables': ['ROOT_DIR', 'DATAFRAME_SAMPLE', 'PLOT_STYLE', 'TODAY, DATE_STR', 'CPU_CORES']
})
display(variables_summary)


In [None]:

import json
import re


### Additional useful libraries imported.

### Congratulations! The notebook has finished running. Use `help` to see a full list of available utilities.

### Congratulations! 🎉

Your initialization notebook has run successfully. You're all set up and ready to go!