<a href="https://colab.research.google.com/github/cedamusk/AI-N-ML/blob/Tools/Advanced_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Union
import logging
from functools import wraps
import time

In [13]:
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s-%(levelname)-%(message)s'
)

In [14]:
def timer_decorator(func):
  @wraps(func)
  def wrapper(*args, **kwargs):
    start_time=time.time()
    result=func(*args, **kwargs)
    end_time=time.time()
    logging.info(f'{func.__name__}took{end_time-start_time:.2f} seconds to execute')
    return result
  return wrapper

In [15]:
def setup_visualization_style():
  plt.style.use('seaborn')
  sns.set_palette('viridis')
  plt.rcParams['figure.figsize']=(10, 6)
  plt.rcParams['axes.grid']=True

In [16]:
class DataAnalyzer:
  def __init__(self):
    setup_visualization_style()

  @timer_decorator
  def create_multi_index_df(self, data: Dict)-> pd.DataFrame:
    try:
      df=pd.DataFrame(data)
      if not all (col in df.columns for col in ['Region', 'Product', 'Sales']):
        raise ValueError('Required columns missing fromm data')
      df.set_index(['Region', 'Product'], inplace=True)
      return df
    except Exception as e:
      logging.error(f"Error creating multi-index DataFrame: {str(e)}")
      raise

  @timer_decorator
  def merge_dataframes(self, left_df: pd.DataFrame, right_df: pd.DataFrame,
                       on_column: str, how: str='outer') -> pd.DataFrame:
      try:
        if on_column not in left_df.columns or on_column not in right_df.columns:
          raise ValueError(f"Merge column'{on_column}' not found in both DataFrames")
        return pd.merge(left_df, right_df, on=on_column, how=how)
      except Exception as e:
        logging.error(f"Error merging DataFrames: {str(e)}")
        raise

  @timer_decorator
  def create_pivot_analysis(self, df: pd.DataFrame)-> Dict[str, pd.DataFrame]:
    try:
      df_reset=df.reset_index()
      pivot_table=pd.pivot_table(
          data=df_reset,
          values='Sales',
          index='Region',
          columns='Product',
          aggfunc=['sum', 'mean'],
          margins=True
      )

      crosstab=pd.crosstab(
          df_reset['Region'],
          df_reset['Product'],
          values=df_reset['Sales'],
          aggfunc='sum',
          margins=True
      )

      return {
          'pivot_table': pivot_table,
          'crosstab': crosstab
      }

    except Exception as e:
      logging.error(f"Error creating pivot analysis: {str(e)}")
      raise

  @timer_decorator
  def process_time_series(self, start_date: str, periods: int,
                          freq: str='D')-> pd.DataFrame:
      try:
        dates=pd.date_range(start_date, periods=periods, freq=freq)
        df=pd.DataFrame({
            'Date': dates,
            'Value': np.random.randn(len(dates)).cumsum(),
            'Trend': np.linspace(0, 100, len(dates))
        }).set_index('Date')

        df['Rolling_Mean']=df['Value'].rolling(window=3).mean()
        df['Rolling_Std']=df['Value'].rolling(window=3).std()

        return df
      except Exception as e:
        logging.error(f"Error processing time series: {str(e)}")
        raise

  @staticmethod
  def clean_dataframe(df: pd.DataFrame,
                      numeric_fill_strategy: str='mean',
                      categorical_fill_value: str='missing')-> pd.DataFrame:
      try:
        df_cleaned=df.copy()

        numeric_cols=df_cleaned.select_dtypes(include=[np.number]).columns
        for col in numeric_cols:
          if numeric_fill_strategy=='mean':
            fill_value=df_cleaned[col].mean()
          elif numeric_fill_strategy=='median':
            fill_value=df_cleaned[col].median()
          else:
            fill_value=0
          df_cleaned[col].fillna(fill_value, inplace=True)

          categorical_cols=df_cleaned.select_dtypes(include=['object']).columns
          for col in categorical_cols:
            df_cleaned[col].fillna(categorical_fill_value, inplace=True)

          return df_cleaned

      except Exception as e:
        logging.error(f"Error cleaning DataFrame: {str(e)}")
        raise

  def visualize_data(self, df: pd.DataFrame, time_series_df: pd.DataFrame=None):
    try:
      fig=plt.figure(figsize=(15, 10))

      plt.subplot(2,2,1)
      sns.barplot(data=df.reset_index(), x='Region', y='Sales', hue='Product')
      plt.title("Sales by Region and Product")
      plt.xticks(rotation=45)

      plt.subplot(2, 2, 2)
      sns.boxplot(data=df.reset_index(), x='Region', y='Sales')
      plt.title("Sales Distribution by Region")
      plt.xticks(rotation=45)

      if time_series_df is not None:
        plt.subplot(2, 2, 3)
        time_series_df["Value"].plot(title='Time Series')
        plt.title("Time Series Analysis")

        plt.subplot(2, 2, 4)
        sns.heatmap(time_series_df.corr(), annot=True, cmap='coolwarm')
        plt.title("Correlation Heatmap")

      plt.tight_layout()
      plt.show()

    except Exception as e:
      logging.error(f"Error creating visualizations: {str(e)}")
      raise


In [17]:
def main():
  main()

  try:
    analyzer=DataAnalyzer()

    data={
        'Region': ['North', 'North', 'South', 'South', 'East', 'East'],
        'Product': ['A', 'B', 'A', 'B', 'A', 'B'],
        "Sales":[234, 123, 345, 231, 543, 654]
    }

    df=analyzer.create_multi_index_df(data)
    print('MultiIndexed DataFrame:')
    print(df)

    time_series_df=analyzer.process_time_series('20230101', 10)
    print("\nTime Series Analysis:")
    print(time_series_df)

    pivot_analysis=analyzer.create_pivot_analysis(df)
    print("\nPivot Analysis:")
    print(pivot_analysis['pivot_table'])

  except Exception as e:
    logging.error(f"Error in main execution: {str(e)}")
    raise

In [18]:
if __name__=="__main__":
  main()

RecursionError: maximum recursion depth exceeded