# CS624 Final Project

## Read the data from  Parquet Files

* SMA (Simple Moving Average) is an average of data points over a specific period, emphasizing all points equally. 
* EMA (Exponential Moving Average) gives more weight to recent data points, reacting faster to price changes.
* Variance (Var) measures the average of the squared differences from the mean, indicating the spread of a dataset.
* Standard Deviation (Std) is the square root of variance, representing the average distance of each data point from the mean.
* Smoothing Period (7,30,90) days

In [0]:
%%capture
!pip install pandas_ta --quiet

In [0]:
#Pyspark
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType,DoubleType,DateType,TimestampType,LongType
from pyspark.sql.functions import mean, min, max, stddev, regexp_replace,trim
from pyspark.sql import functions as F
from pyspark.sql.functions import col, isnan, when, trim
from pyspark.sql.types import *
from pyspark.sql.functions import *
from pyspark.sql.functions import col, avg
from pyspark.sql.window import Window
from pyspark import SparkConf
import numpy as np

import matplotlib.pyplot as plt

In [0]:
spark

In [0]:
class DataParquet:
    def __init__(self, app_name="DataParquet"):
        """
        Initializes a Spark session.

        Args:
        - app_name: Name of the Spark application.
        """
        conf = SparkConf()
        conf.set("spark.executor.memory", "20g")
        conf.set("spark.driver.memory", "10g")
        conf.set("spark.executor.cores", "4")
        conf.set("spark.default.parallelism", "200")
        conf.set("spark.sql.shuffle.partitions", "200")
        conf.set("spark.executor.extraJavaOptions", "-XX:+UseG4GC")
        #conf.set("spark.dynamicAllocation.enabled", "true")
        conf.set("spark.shuffle.service.enabled", "true")
        conf.set("spark.memory.fraction", "0.7")
        conf.set("spark.memory.storageFraction", "0.5")
        conf.set('spark.sql.execution.arrow.enabled', 'true')
        self.spark = SparkSession.builder \
            .appName(app_name) \
            .config(conf=conf) \
            .getOrCreate()


    def read_parquet(self, file_path, header=True, infer_schema=True):
        """
        Reads a Parquet file into a DataFrame.

        Args:
        - file_path: Path to the Parquet file.
        - header: Whether the Parquet file has a header row.
        - infer_schema: Whether to infer the data types of columns.

        Returns:
        - DataFrame: The DataFrame containing the Parquet data.
        """
       
        return self.spark.read.parquet(file_path, header=header, inferschema=True)
    
    def ChangeDataType(self,clean_data):
        schema = StructType([
        StructField("Timestamp", LongType(), True),
        StructField("Open", DoubleType(), True),
        StructField("High", DoubleType(), True),
        StructField("Low", DoubleType(), True),
        StructField("Close", DoubleType(), True),
        StructField("VolBTC", DoubleType(), True),
        StructField("VolCurrency", DoubleType(), True),
        StructField("Weighted_Price", DoubleType(), True),
        StructField("Timestamp_new", TimestampType(), True),
        StructField("date", DateType(), True),
        StructField("time", StringType(), True),
        StructField("hour",  IntegerType(), True),
        StructField("day_of_week", IntegerType(), True),
        StructField("year", IntegerType(), True),
        ])

        new_data = clean_data.select(
        [clean_data[col_name].cast(schema[col_name].dataType).alias(col_name) for col_name in clean_data.columns])
        return new_data


    def stop(self):
        """
        Stops the Spark session.
        """
        self.spark.stop()

In [0]:
dp = DataParquet(app_name="DataParquet")
URL="/FileStore/tables/parquet_data/splitDFWithYear.parquet"
splitDFWithYear = dp.read_parquet(URL)
splitDFWithYear=dp.ChangeDataType(splitDFWithYear)

In [0]:
splitDFWithYear_pandas=splitDFWithYear.toPandas()
splitDFWithYear_pandas.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year
0,1496431020,2425.01,2427.04,2425.01,2426.83,3.273191,7943.286051,2426.770974,2017-06-02,2017-06-02,19:17:00,19,6,2017
1,1496431920,2422.97,2425.11,2422.97,2423.0,2.619033,6346.482316,2423.215865,2017-06-02,2017-06-02,19:32:00,19,6,2017
2,1496497680,2520.99,2520.99,2520.99,2520.99,0.010368,26.137624,2520.99,2017-06-03,2017-06-03,13:48:00,13,7,2017
3,1496504280,2548.0,2550.0,2547.01,2550.0,8.082427,20600.322052,2548.779285,2017-06-03,2017-06-03,15:38:00,15,7,2017
4,1496532900,2557.26,2557.26,2557.26,2557.26,0.035473,90.713505,2557.26,2017-06-03,2017-06-03,23:35:00,23,7,2017


# Reduce Pandas Memory

In [0]:
##https://www.kaggle.com/code/rinnqd/reduce-memory-usage
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [0]:
splitDFWithYear_pandas = reduce_mem_usage(splitDFWithYear_pandas)

Memory usage after optimization is: 165.43 MB
Decreased by 52.0%


# Feature Engineering

In [0]:
# Data smoothing is done by using an algorithm to remove noise from a data set. This allows important patterns to more clearly stand out.
class DataSmoothing:
    def MovingAverages(self,data_df,smoothening_type):
        smoothening_range = [7, 30, 90]
        #column_name=['Open','High','Low','Close','Weighted_Price']
        column_name=['Open','High','Low','Weighted_Price']
        if smoothening_type == 'sma':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_sma_{window_length}'] = ta.sma(data_df[column], length=window_length)

        if smoothening_type == 'ema':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_ema_{window_length}'] = ta.ema(data_df[column], length=window_length)

        if smoothening_type == 'rsi':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_rsi_{window_length}'] = ta.rsi(data_df[column], length=window_length)

        if smoothening_type == 'var':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_var_{window_length}'] = ta.variance(data_df[column], length=window_length)

        if smoothening_type == 'std':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_std_{window_length}'] = ta.stdev(data_df[column], length=window_length)

        if smoothening_type == 'roc':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_roc_{window_length}'] = ta.roc(data_df[column], length=window_length)
        
        if smoothening_type == 'dema':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_dema_{window_length}'] = ta.dema(data_df[column], length=window_length)

        if smoothening_type == 'tema':
            # Loop through each column (excluding non-numeric columns)
            for column in data_df[column_name]:
                # Calculate SMA using pandas_ta for each window length in the range
                for window_length in smoothening_range:
                    data_df[f'{column}_tema_{window_length}'] = ta.tema(data_df[column], length=window_length)
        
        return data_df

da = DataSmoothing()

In [0]:
import pandas_ta as ta
data_df_sma=da.MovingAverages(splitDFWithYear_pandas,'sma')
data_df_sma.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year,Open_sma_7,Open_sma_30,Open_sma_90,High_sma_7,High_sma_30,High_sma_90,Low_sma_7,Low_sma_30,Low_sma_90,Weighted_Price_sma_7,Weighted_Price_sma_30,Weighted_Price_sma_90
0,1496431020,2426.0,2428.0,2426.0,2426.0,3.273438,7943.286133,2426.0,2017-06-02,2017-06-02,19:17:00,19,6,2017,,,,,,,,,,,,
1,1496431920,2422.0,2426.0,2422.0,2424.0,2.619141,6346.482422,2424.0,2017-06-02,2017-06-02,19:32:00,19,6,2017,,,,,,,,,,,,
2,1496497680,2520.0,2520.0,2520.0,2520.0,0.010368,26.137625,2520.0,2017-06-03,2017-06-03,13:48:00,13,7,2017,,,,,,,,,,,,
3,1496504280,2548.0,2550.0,2548.0,2550.0,8.085938,20600.322266,2548.0,2017-06-03,2017-06-03,15:38:00,15,7,2017,,,,,,,,,,,,
4,1496532900,2558.0,2558.0,2558.0,2558.0,0.035461,90.713509,2558.0,2017-06-03,2017-06-03,23:35:00,23,7,2017,,,,,,,,,,,,


In [0]:
data_df_ema=da.MovingAverages(splitDFWithYear_pandas,'ema')
data_df_ema.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year,Open_sma_7,Open_sma_30,Open_sma_90,High_sma_7,High_sma_30,High_sma_90,Low_sma_7,Low_sma_30,Low_sma_90,Weighted_Price_sma_7,Weighted_Price_sma_30,Weighted_Price_sma_90,Open_ema_7,Open_ema_30,Open_ema_90,High_ema_7,High_ema_30,High_ema_90,Low_ema_7,Low_ema_30,Low_ema_90,Weighted_Price_ema_7,Weighted_Price_ema_30,Weighted_Price_ema_90
0,1496431020,2426.0,2428.0,2426.0,2426.0,3.273438,7943.286133,2426.0,2017-06-02,2017-06-02,19:17:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,
1,1496431920,2422.0,2426.0,2422.0,2424.0,2.619141,6346.482422,2424.0,2017-06-02,2017-06-02,19:32:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,
2,1496497680,2520.0,2520.0,2520.0,2520.0,0.010368,26.137625,2520.0,2017-06-03,2017-06-03,13:48:00,13,7,2017,,,,,,,,,,,,,,,,,,,,,,,,
3,1496504280,2548.0,2550.0,2548.0,2550.0,8.085938,20600.322266,2548.0,2017-06-03,2017-06-03,15:38:00,15,7,2017,,,,,,,,,,,,,,,,,,,,,,,,
4,1496532900,2558.0,2558.0,2558.0,2558.0,0.035461,90.713509,2558.0,2017-06-03,2017-06-03,23:35:00,23,7,2017,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
data_df_var=da.MovingAverages(splitDFWithYear_pandas,'var')
data_df_var.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year,Open_sma_7,Open_sma_30,Open_sma_90,High_sma_7,High_sma_30,High_sma_90,Low_sma_7,Low_sma_30,Low_sma_90,Weighted_Price_sma_7,Weighted_Price_sma_30,Weighted_Price_sma_90,Open_ema_7,Open_ema_30,Open_ema_90,High_ema_7,High_ema_30,High_ema_90,Low_ema_7,Low_ema_30,Low_ema_90,Weighted_Price_ema_7,Weighted_Price_ema_30,Weighted_Price_ema_90,Open_var_7,Open_var_30,Open_var_90,High_var_7,High_var_30,High_var_90,Low_var_7,Low_var_30,Low_var_90,Weighted_Price_var_7,Weighted_Price_var_30,Weighted_Price_var_90
0,1496431020,2426.0,2428.0,2426.0,2426.0,3.273438,7943.286133,2426.0,2017-06-02,2017-06-02,19:17:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1496431920,2422.0,2426.0,2422.0,2424.0,2.619141,6346.482422,2424.0,2017-06-02,2017-06-02,19:32:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1496497680,2520.0,2520.0,2520.0,2520.0,0.010368,26.137625,2520.0,2017-06-03,2017-06-03,13:48:00,13,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1496504280,2548.0,2550.0,2548.0,2550.0,8.085938,20600.322266,2548.0,2017-06-03,2017-06-03,15:38:00,15,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1496532900,2558.0,2558.0,2558.0,2558.0,0.035461,90.713509,2558.0,2017-06-03,2017-06-03,23:35:00,23,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
spark.catalog.clearCache()
data_df_std=da.MovingAverages(splitDFWithYear_pandas,'std')
data_df_std.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year,Open_sma_7,Open_sma_30,Open_sma_90,High_sma_7,High_sma_30,High_sma_90,Low_sma_7,Low_sma_30,Low_sma_90,Weighted_Price_sma_7,Weighted_Price_sma_30,Weighted_Price_sma_90,Open_ema_7,Open_ema_30,Open_ema_90,High_ema_7,High_ema_30,High_ema_90,Low_ema_7,Low_ema_30,Low_ema_90,Weighted_Price_ema_7,Weighted_Price_ema_30,Weighted_Price_ema_90,Open_var_7,Open_var_30,Open_var_90,High_var_7,High_var_30,High_var_90,Low_var_7,Low_var_30,Low_var_90,Weighted_Price_var_7,Weighted_Price_var_30,Weighted_Price_var_90,Open_std_7,Open_std_30,Open_std_90,High_std_7,High_std_30,High_std_90,Low_std_7,Low_std_30,Low_std_90,Weighted_Price_std_7,Weighted_Price_std_30,Weighted_Price_std_90
0,1496431020,2426.0,2428.0,2426.0,2426.0,3.273438,7943.286133,2426.0,2017-06-02,2017-06-02,19:17:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1496431920,2422.0,2426.0,2422.0,2424.0,2.619141,6346.482422,2424.0,2017-06-02,2017-06-02,19:32:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1496497680,2520.0,2520.0,2520.0,2520.0,0.010368,26.137625,2520.0,2017-06-03,2017-06-03,13:48:00,13,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1496504280,2548.0,2550.0,2548.0,2550.0,8.085938,20600.322266,2548.0,2017-06-03,2017-06-03,15:38:00,15,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1496532900,2558.0,2558.0,2558.0,2558.0,0.035461,90.713509,2558.0,2017-06-03,2017-06-03,23:35:00,23,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
#spark.catalog.clearCache()
#data_df_rsi=da.MovingAverages(splitDFWithYear_pandas,'rsi')
#data_df_rsi.head()

In [0]:
#data_df_rsi=da.MovingAverages(splitDFWithYear_pandas,'roc')
#data_df_rsi.head()

In [0]:
#data_df_rsi=da.MovingAverages(splitDFWithYear_pandas,'dema')
#data_df_rsi.head()

In [0]:
#data_df_rsi=da.MovingAverages(splitDFWithYear_pandas,'tema')
#data_df_rsi.head()

In [0]:
spark.catalog.clearCache()
data_df_std.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,VolBTC,VolCurrency,Weighted_Price,Timestamp_new,date,time,hour,day_of_week,year,Open_sma_7,Open_sma_30,Open_sma_90,High_sma_7,High_sma_30,High_sma_90,Low_sma_7,Low_sma_30,Low_sma_90,Weighted_Price_sma_7,Weighted_Price_sma_30,Weighted_Price_sma_90,Open_ema_7,Open_ema_30,Open_ema_90,High_ema_7,High_ema_30,High_ema_90,Low_ema_7,Low_ema_30,Low_ema_90,Weighted_Price_ema_7,Weighted_Price_ema_30,Weighted_Price_ema_90,Open_var_7,Open_var_30,Open_var_90,High_var_7,High_var_30,High_var_90,Low_var_7,Low_var_30,Low_var_90,Weighted_Price_var_7,Weighted_Price_var_30,Weighted_Price_var_90,Open_std_7,Open_std_30,Open_std_90,High_std_7,High_std_30,High_std_90,Low_std_7,Low_std_30,Low_std_90,Weighted_Price_std_7,Weighted_Price_std_30,Weighted_Price_std_90
0,1496431020,2426.0,2428.0,2426.0,2426.0,3.273438,7943.286133,2426.0,2017-06-02,2017-06-02,19:17:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1496431920,2422.0,2426.0,2422.0,2424.0,2.619141,6346.482422,2424.0,2017-06-02,2017-06-02,19:32:00,19,6,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1496497680,2520.0,2520.0,2520.0,2520.0,0.010368,26.137625,2520.0,2017-06-03,2017-06-03,13:48:00,13,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1496504280,2548.0,2550.0,2548.0,2550.0,8.085938,20600.322266,2548.0,2017-06-03,2017-06-03,15:38:00,15,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1496532900,2558.0,2558.0,2558.0,2558.0,0.035461,90.713509,2558.0,2017-06-03,2017-06-03,23:35:00,23,7,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [0]:
columns_to_convert = ['Open', 'High', 'Low', 'Close', 'VolBTC', 'VolCurrency', 'Weighted_Price']
data_df_std[columns_to_convert] = data_df_std[columns_to_convert].astype('float64')

In [0]:
spark_df = spark.createDataFrame(data_df_std)


In [0]:
spark_df.write.mode("overwrite").parquet("/FileStore/tables/parquet_data/data_smoothening_spark.parquet", compression="snappy") 

In [0]:
%fs ls /FileStore/tables/parquet_data/data_smoothening_spark.parquet

path,name,size,modificationTime
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/_committed_3712456460485398420,_committed_3712456460485398420,72675,1713598203000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/_committed_4046819016022805013,_committed_4046819016022805013,72583,1713676255000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/_started_4046819016022805013,_started_4046819016022805013,0,1713675978000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00000-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-9-1-c000.snappy.parquet,part-00000-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-9-1-c000.snappy.parquet,3741739,1713675991000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00001-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-10-1-c000.snappy.parquet,part-00001-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-10-1-c000.snappy.parquet,3794589,1713675992000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00002-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-11-1-c000.snappy.parquet,part-00002-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-11-1-c000.snappy.parquet,3785922,1713675992000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00003-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-12-1-c000.snappy.parquet,part-00003-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-12-1-c000.snappy.parquet,3795843,1713675991000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00004-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-13-1-c000.snappy.parquet,part-00004-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-13-1-c000.snappy.parquet,3789803,1713675992000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00005-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-14-1-c000.snappy.parquet,part-00005-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-14-1-c000.snappy.parquet,3792796,1713675992000
dbfs:/FileStore/tables/parquet_data/data_smoothening_spark.parquet/part-00006-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-15-1-c000.snappy.parquet,part-00006-tid-4046819016022805013-9541258c-d2de-45a2-9aac-43e2b84596c1-15-1-c000.snappy.parquet,3779111,1713675992000
