In [None]:
import pandas as pd
import numpy as np 
import os
from dateutil.parser import parse

In [None]:
path = os.getcwd()
path = os.path.join(path,"Data")
stockspath = os.path.join(path,"Stock")
# print(stockspath)
# print(path)

In [None]:
# def is_date(date):
#     try:
#         parse(date,fuzzy=False)
#         return True
#     except Exception as e:
#         return False

In [None]:
def clean_index(ind):
    """
    Drops the duplicate rows in the dataframe based on Date column.

    Parameters
    ----------

    ind : dataframe
        index dataframe

    Returns
    -------

    ind: dataframe
        updated dataframe after droping duplicates.

    """
    ind["Date"] = pd.to_datetime(ind["Date"])
    ind = ind.drop_duplicates(subset=["Date"],keep="first")
    return ind

In [None]:
def data_cleaning(df,ind):
    """
    Removes duplicate rows, Adds missing rows, fills null values from pervious row to the stock dataframe.

    Parameters
    ----------

    df : dataframe
        stock dataframe
    
    ind : dataframe
        index dataframe

    Returns
    -------

    df : dataframe
        updated dataframe after performing all the operations.

    """
    ind = clean_index(ind)
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.drop_duplicates(subset=["Date"],keep="first")
    df = add_missing_rows(df,ind)
    df = fill_with_previous_values(df)
    return df

In [1]:
def add_missing_rows(df,ind):
    """

    Adds rows to the stock dataframe.

    If the date is present in index dataframe an not present in stock dataframe,
    then a new row (as date and NAN values) is added to stock dataframe.

    Parameters
    ----------
    
    df : dataframe
        stock dataframe
    
    ind : dataframe
        index dataframe
    
    Returns
    -------

    df : dataframe
        updated dataframe after adding new rows.

    """

    s = df.Date.head(1).values[0]
    e = df.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    missing_df = pd.DataFrame(columns=df.columns)
    indexes_dates = ind.Date.values
    df.Date = pd.to_datetime(df.Date)
    df_dates = df.Date.values
    start = 0
    for i,v in enumerate(indexes_dates):
        if v not in df.Date.values:
            res = list(np.append(ind.iloc[i].values,[np.nan]*8))
            missing_df.loc[start] = res
            start += 1
    df = pd.concat([df,missing_df])
    return df

In [None]:
def fill_with_previous_values(df):
    """
    Fills the null values in the dataframe with the values from the previous row.

    Parameters
    ----------
    
    df : dataframe
        stock dataframe
    
    Returns
    -------

    df : dataframe
        updated dataframe after filling with previous values.
        
    """

    df.fillna(method="ffill",inplace=True)
    return df