In [6]:
import gradio as gr
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
from datetime import datetime, timedelta
import joblib
import logging
import os

# Set up logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')

def check_data_requirements(df):
    """
    Validate the input data meets all requirements
    """
    required_columns = ['Time of Call', 'API Code']
    missing_columns = [col for col in required_columns if col not in df.columns]

    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")

    if df.empty:
        raise ValueError("DataFrame is empty")

    return True

def preprocess_and_train(api_file, api_code):
    """
    Preprocess data and train ARIMA model with extended error handling
    """
    logging.info(f"Starting preprocessing for {api_code}")

    try:
        # Check if file exists
        if not os.path.exists(api_file):
            raise FileNotFoundError(f"File not found: {api_file}")

        # Load data with explicit encoding and error handling
        try:
            api_data = pd.read_csv(api_file, encoding='utf-8')
        except UnicodeDecodeError:
            api_data = pd.read_csv(api_file, encoding='latin1')

        logging.info(f"Successfully loaded data for {api_code}")

        # Validate data
        check_data_requirements(api_data)

        # Convert to datetime with error handling
        try:
            api_data['Time of Call'] = pd.to_datetime(api_data['Time of Call'])
        except Exception as e:
            logging.error(f"DateTime conversion failed. Sample date format: {api_data['Time of Call'].iloc[0]}")
            raise ValueError(f"DateTime conversion failed: {str(e)}")

        api_data.set_index('Time of Call', inplace=True)

        # Resample data
        daily_calls = api_data['API Code'].resample('D').count()

        if len(daily_calls) < 10:  # Minimum data requirement
            raise ValueError("Insufficient data points for prediction")

        # Split data
        train_size = int(len(daily_calls) * 0.8)
        train, test = daily_calls[:train_size], daily_calls[train_size:]

        logging.info(f"Training ARIMA model for {api_code}")

        # Train ARIMA model
        arima_model = ARIMA(train, order=(5,1,0))
        fitted_model = arima_model.fit()

        # Make predictions for test set
        arima_forecast = fitted_model.forecast(steps=len(test))

        # Calculate metrics
        mae = mean_absolute_error(test, arima_forecast)
        mse = mean_squared_error(test, arima_forecast)

        # Save model
        model_filename = f"{api_code}_best_model.pkl"
        joblib.dump(fitted_model, model_filename)
        logging.info(f"Model saved as {model_filename}")

        return {
            'success': True,
            'metrics': {
                'MAE': mae,
                'MSE': mse
            }
        }

    except Exception as e:
        logging.error(f"Error processing {api_code}: {str(e)}")
        return {
            'success': False,
            'error': str(e)
        }

def predict_api_calls(api_choice, days):
    """
    Make predictions with enhanced error handling
    """
    logging.info(f"Making predictions for {api_choice} for {days} days")

    try:
        model_path = f"{api_choice}_best_model.pkl"

        if not os.path.exists(model_path):
            return None, f"Error: Model file not found for {api_choice}"

        model = joblib.load(model_path)

        future_dates = pd.date_range(
            start=datetime.today(),
            periods=days,
            freq='D'
        )

        predictions = model.forecast(steps=days)

        # Ensure predictions are non-negative
        predictions = np.maximum(predictions, 0)

        results = pd.DataFrame({
            "Date": future_dates,
            "Predicted_Calls": predictions.round(0)  # Round to whole numbers
        })

        # Create plot
        import matplotlib.pyplot as plt
        plt.figure(figsize=(10, 6))
        plt.plot(results["Date"], results["Predicted_Calls"])
        plt.title(f"{api_choice} API Call Predictions")
        plt.xlabel("Date")
        plt.ylabel("Predicted Calls")
        plt.xticks(rotation=45)
        plt.tight_layout()

        return plt.gcf(), results.to_string()

    except Exception as e:
        logging.error(f"Prediction error: {str(e)}")
        return None, f"Error during prediction: {str(e)}"

def main():
    """
    Main function with sample data handling
    """
    # Check if sample data exists, if not, create it
    sample_data = {
        'A9.csv': pd.DataFrame({
            'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
            'API Code': np.random.randint(1, 100, 100)
        }),
        'A2.csv': pd.DataFrame({
            'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
            'API Code': np.random.randint(1, 100, 100)
        }),
        'A7.csv': pd.DataFrame({
            'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
            'API Code': np.random.randint(1, 100, 100)
        })
    }

    # Save sample data if real data doesn't exist
    for file_name, data in sample_data.items():
        if not os.path.exists(file_name):
            logging.info(f"Creating sample data file: {file_name}")
            data.to_csv(file_name, index=False)

    # Train models
    api_files = {'A9.csv': 'A9', 'A2.csv': 'A2', 'A7.csv': 'A7'}

    for file, api_code in api_files.items():
        result = preprocess_and_train(file, api_code)
        if result['success']:
            logging.info(f"Successfully trained model for {api_code}")
        else:
            logging.error(f"Failed to train model for {api_code}: {result['error']}")

    # Create Gradio interface
    interface = gr.Interface(
        fn=predict_api_calls,
        inputs=[
            gr.Dropdown(choices=["A9", "A2", "A7"], label="Choose API"),
            gr.Slider(minimum=1, maximum=30, step=1, value=7, label="Days to Predict")
        ],
        outputs=[
            gr.Plot(label="Prediction Plot"),
            gr.Textbox(label="Detailed Results", lines=10)
        ],
        title="API Call Prediction",
        description="Predict API calls for the next N days"
    )

    interface.launch(share=True)

if __name__ == "__main__":
    main()

  'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
  'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
  'Time of Call': pd.date_range(start='2023-01-01', periods=100, freq='H'),
ERROR:root:DateTime conversion failed. Sample date format: 01-02-2025 07:21
ERROR:root:Error processing A9: DateTime conversion failed: time data "13-02-2025 00:40" doesn't match format "%m-%d-%Y %H:%M", at position 182. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.
ERROR:root:Failed to train model for A9: DateTime conversion failed: time data "13-02-2025 00:40" doesn't match format "%m-%d-%Y %H:%M", at position 182. You might want to try:
    - passing `format` if you

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d6ec4a974eb03a92a6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
