<a href="https://colab.research.google.com/github/ZainabSalama12/GP/blob/main/GP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
#------------------------------------preprocessing and statistics --------------------------
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import stats


def types_splitting(data):
    nums = list(data.select_dtypes(['int64', 'float64']))
    cats = list(data.select_dtypes(['object']))
    return nums, cats


def drop_ids(data):
    for col in data:
        if "id" in col.lower():
            data.drop(columns=col, inplace=True)


def handling_duplicates(data):
    data.drop_duplicates(inplace=True)


def cats_encoding(data, cats):
    for col in cats:
        Encoder = LabelEncoder()
        data[col] = Encoder.fit_transform(data[col])


def handling_nulls(data, nums, cats):
    null_total = data.isnull().sum()
    per = null_total / len(data)
    col_to_drop = per[per > 0.5].index
    nums = [x for x in nums if x not in col_to_drop]
    cats = [x for x in cats if x not in col_to_drop]
    data.drop(columns=col_to_drop, inplace=True)
    for col in nums:
        data[col] = data[col].fillna(data[col].median())
    for col in cats:
        data[col] = data[col].fillna(data[col].mode()[0])


def normalize(data, nums):
    scaler = MinMaxScaler(feature_range=(0, 1))
    for col in nums:
        norm = scaler.fit_transform(data[col])


def outlire(data):
    Q1 = data.quantile(q=.25)
    Q3 = data.quantile(q=.75)
    IQR = data.apply(stats.iqr)

    data_clean = data[~((data < (Q1 - 1.5 * IQR)) | (data > (Q3 + 1.5 * IQR))).any(axis=1)]


def wrong_format(data):
    datetime_columns = data.select_dtypes(include=['datetime']).columns.tolist()

    for col in datetime_columns:
        data[col + '_time'] = data[col].dt.time
        data[col + '_hour'] = data[col + '_time'].apply(lambda x: x.hour)
        data[col + '_minute'] = data[col + '_time'].apply(lambda x: x.minute)
        data[col + '_second'] = data[col + '_time'].apply(lambda x: x.second)
        data[col + '_day'] = data[col].dt.day
        data[col + '_month'] = data[col].dt.month
        data[col + '_year'] = data[col].dt.year


def preprocessing(data):
    drop_ids(data)
    numerical, categorical = types_splitting(data)
    handling_nulls(data, numerical, categorical)
    handling_duplicates(data)
    numerical_statistics = num_stat(data, numerical)
    categorical_statistics = cat_stat(data, categorical)
    cats_encoding(data, categorical)
    return numerical_statistics, categorical_statistics,data


def cat_stat(data, cats):
    stat = {}
    for col in cats:
        col_stat = data[col].value_counts(normalize=True)*100
        col_stat = col_stat.round(2).astype(str)+'%'
        stat[col] = col_stat
    return stat


def num_stat(data, nums):
    stat = {}
    for col in nums:
        col_stat = {}
        col_stat['Average'] = data[col].mean()
        col_stat['Min'] = data[col].min()
        col_stat['Max'] = data[col].max()
        stat[col] = col_stat
    return stat


In [5]:
#---------------------------------visualizations---------------------------------
import pandas as pd
import plotly
import plotly.express as px
import json
# ------------------------------ Histogram ------------------------------
def create_histogram(data, x, dist=None, color=None, bins=50, func='count', norm=''):
    fig = px.histogram(data_frame=data, x=x, color=dist, color_discrete_sequence=color, nbins=bins, histfunc=func, histnorm=norm)
    return fig
# ------------------------------ Line chart ------------------------------
def create_linechart(data, x, y, dist=None, shape='linear', color=None):
    fig = px.line(data_frame=data, x=x, y=y, color=dist, markers=True, line_shape=shape, color_discrete_sequence=color)
    return fig
# ------------------------------ Bar chart ------------------------------
def create_barchart(data, x, y, dist=None, mode=None, color=None):
    fig = px.bar(data_frame=data, x=x, y=y, color=dist, barmode=mode, color_discrete_sequence=color)
    return fig
# ------------------------------ Box Plot ------------------------------
def create_boxplot(data, y, dist=None, points=None, color=None):
    fig = px.box(data_frame=data, y=y, x=dist, points=points, color_discrete_sequence=color)
    return fig
# ------------------------------ Pie Chart ------------------------------
def create_pie_chart(data, values, names, color=None):
    fig = px.pie(data_frame=data, values=values, names=names, color_discrete_sequence=color)
    return fig
# ------------------------------ Scatter Plot ------------------------------
def create_scatter_plot(data, x, y, dist=None, size=None, symbol=None, color=None):
    fig = px.scatter(data_frame=data, x=x, y=y, color=dist, size=size, symbol=symbol, color_discrete_sequence=color)
    return fig
# ------------------------------ Labels ------------------------------
def update_labels(fig, x_label, y_label):
    fig.update_layout(xaxis_title_text=x_label, yaxis_title_text=y_label)
    return fig
# ------------------------------ Title ------------------------------
def update_title(fig, title):
    fig.update_layout(title=title)
    return fig
# ------------------------------ Json ------------------------------
def to_json(fig):
    return json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

In [None]:
!pip install fastapi
!pip install python-multipart
from fastapi.encoders import jsonable_encoder
from fastapi import FastAPI
from fastapi import FastAPI, UploadFile, File, Form
import json
import plotly.express as px
import io
# --------------------------------------------preprocessing_encoding------------------
app= FastAPI()
@app.post("/preprocessing")
async def preprocessing_endpoint(file: UploadFile = File(...)):
    df = pd.read_csv(file.file)

    # Perform preprocessing
    num_stats, cat_stats, preprocessed_data = preprocessing(df)

    # Return the preprocessing results
    return{
        "numerical_statistics": num_stats,
        "categorical_statistics": cat_stats,
        "preprocessed_data": preprocessed_data.to_dict(orient="records")}

#------------------------------------------types_splitting_encoding------------------------
@app.post("/types_splitting")
async def types_splitting_endpoint(file: UploadFile = File(...)):
      df = pd.read_csv(file.file)
      # Apply types_splitting function
      nums, cats = types_splitting(df)
      # Return the result
      return {"numerical_columns": nums, "categorical_columns": cats}

In [11]:
# -----------------------------endpoint_histogram-------------------
app =FastAPI()
@app.post("/create-histogram")
async def create_histogram_endpoint(data: dict = Form(...), x: str = Form(...), dist: str = Form(None),
                                   color: str = Form(None), bins: int = Form(50), func: str = Form('count'),
                                   norm: str = Form('')):
    fig = create_histogram(data, x, dist, color, bins, func, norm)
    fig_json = to_json(fig)
    return {"histogram": fig_json}
# ----------------------------endpoint_linechart-----------------------
@app.post("/create-linechart")
async def create_linechart_endpoint(data: dict = Form(...), x: str = Form(...), y: str = Form(...),
                                    dist: str = Form(None), shape: str = Form('linear'), color: str = Form(None)):
    fig = create_linechart(data, x, y, dist, shape, color)
    fig_json = to_json(fig)
    return {"linechart": fig_json}
# ----------------------------endpoint_barchart-----------------------
@app.post("/create-barchart")
async def create_barchart_endpoint(data: dict = Form(...), x: str = Form(...), y: str = Form(...),
                                   dist: str = Form(None), mode: str = Form(None), color: str = Form(None)):
    fig = create_barchart(data, x, y, dist, mode, color)
    fig_json = to_json(fig)
    return {"barchart": fig_json}
# ----------------------------endpoint_boxplot-------------------------
@app.post("/create-boxplot")
async def create_boxplot_endpoint(data: dict = Form(...), y: str = Form(...), dist: str = Form(None),
                                  points: str = Form(None), color: str = Form(None)):
    fig = create_boxplot(data, y, dist, points, color)
    fig_json = to_json(fig)
    return {"boxplot": fig_json}
# ----------------------------endpoint_piechart-------------------------
@app.post("/create-pie-chart")
async def create_pie_chart_endpoint(data: dict = Form(...), values: str = Form(...), names: str = Form(...),
                                    color: str = Form(None)):
    fig = create_pie_chart(data, values, names, color)
    fig_json = to_json(fig)
    return {"piechart": fig_json}
# ----------------------------endpoint_scatterplot-------------------------
@app.post("/create-scatter-plot")
async def create_scatter_plot_endpoint(data: dict = Form(...), x: str = Form(...), y: str = Form(...),
                                       dist: str = Form(None), size: str = Form(None),
                                       symbol: str = Form(None), color: str = Form(None)):
    fig = create_scatter_plot(data, x, y, dist, size, symbol, color)
    fig_json = to_json(fig)
    return {"scatterplot": fig_json}
# ---------------------------update_label-----------------------------------
@app.post("/update-labels")
async def update_labels_endpoint(fig_json: str = Form(...), x_label: str = Form(...), y_label: str = Form(...)):
    fig = json.loads(fig_json)
    updated_fig = update_labels(fig, x_label, y_label)
    updated_fig_json = to_json(updated_fig)
    return {"plot": updated_fig_json}
# ---------------------------update_title-----------------------------------
@app.post("/update-title")
async def update_title_endpoint(fig_json: str = Form(...), title: str = Form(...)):
    fig = json.loads(fig_json)
    updated_fig = update_title(fig, title)
    updated_fig_json = to_json(updated_fig)
    return {"plot": updated_fig_json}