# Pandas read_csv exploration

In [1]:
from typing import Optional, Union

In [2]:
import datasense as ds
import pandas as pd

In [3]:
def main():
    pd.options.display.max_columns = 500
    df = create_dataframe()
    print(df.head())
    save_dataframe(df=df)
    data = read_file(
        file_name='myfile.csv',
        index_col='t'
    )
    print(data.head())
    print(data.dtypes)

In [4]:
def create_dataframe() -> pd.DataFrame:
    df = pd.DataFrame(
        {
            'b': ds.random_data(distribution='bool'),
            'c': ds.random_data(distribution='categories'),
            'd': ds.timedelta_data(),
            's': ds.random_data(distribution='strings'),
            't': ds.datetime_data(),
            'x': ds.random_data(distribution='norm'),
            'y': ds.random_data(distribution='randint'),
            'z': ds.random_data(distribution='uniform')
        }
    )
    return df

In [5]:
def save_dataframe(df) -> None:
    df.to_csv(
        'myfile.csv',
        index=False
    )

In [6]:
def read_file(
    file_name : str,
    *,
    index_col : Optional[Union[str, bool]] = None
) -> pd.DataFrame:
    df = pd.read_csv(
        file_name,
        index_col=index_col
    )
    """
    Create a DataFrame from an external file.
    
    Parameters
    ----------
    file_name : str
        The name of the file to read.
        
    Returns
    -------
    df : pd.DataFrame
        The dataframe created from the external file.
    
    Examples
    --------
    Example 1
    >>> data = read_file(file_name='myfile.csv')
    """
    return df

In [7]:
if __name__ == '__main__':
    main()

       b       c      d       s                   t         x   y         z
0  False  medium 0 days  female 2020-10-07 12:15:55  0.922910  55  0.030097
1  False   large 0 days    male 2020-10-08 12:15:55  0.940365  19  0.492294
2   True   large 0 days    male 2020-10-09 12:15:55 -0.795164  27  0.948500
3  False   small 0 days    male 2020-10-10 12:15:55 -0.402795  57  0.882655
4  False   large 0 days    male 2020-10-11 12:15:55 -0.992234  38  0.122911
                         b       c       d       s         x   y         z
t                                                                         
2020-10-07 12:15:55  False  medium  0 days  female  0.922910  55  0.030097
2020-10-08 12:15:55  False   large  0 days    male  0.940365  19  0.492294
2020-10-09 12:15:55   True   large  0 days    male -0.795164  27  0.948500
2020-10-10 12:15:55  False   small  0 days    male -0.402795  57  0.882655
2020-10-11 12:15:55  False   large  0 days    male -0.992234  38  0.122911
b       bool
c     