# storage

> Classes that facilitate storage of datasets generated by other modules in the package

In [None]:
#| default_exp storage

In [None]:
#| export
from abc import ABC, abstractmethod
from pathlib import Path
import time, os, pickle

import pandas as pd
import numpy as np

In [None]:
#| export 
class BaseStorage(ABC):
    "Abstract base class for different types of storage classes."

    @abstractmethod
    def save(self, data, dataset_name):
        pass 

    @abstractmethod
    def load(self, dataset_name):
        pass 
    
    @abstractmethod
    def delete(self, dataset_name):
        pass 

    @abstractmethod
    def exists(self, dataset_name):
        pass  
    
    @abstractmethod
    def is_stale(self, dataset_name):
        pass    

In [None]:
#| export
class LocalStorage(BaseStorage):
    "Methods which facilitate storing and loading data using a local file system"

    def __init__(self, data_dir_path: Path):
        if data_dir_path.exists() and data_dir_path.is_dir(): 
            self.data_dir_path = data_dir_path
        else:
            raise ValueError(f"{data_dir_path} is not a path to an existing local directory")

    def save(self, data, dataset_name: str):
        with open(self.filepath(dataset_name), 'wb') as f:
            pickle.dump(data, f)

    def load(self, dataset_name: str):
        with open(self.filepath(dataset_name), 'rb') as f:
            data = pickle.load(f)
        return data
    
    def delete(self, dataset_name: str):
        os.remove(self.filepath(dataset_name))

    def exists(self, dataset_name: str):
        filepath = self.filepath(dataset_name)
        return filepath.exists() and filepath.is_file()
    
    def is_stale(self, dataset_name: str):
        one_year_seconds = 365 * 24 * 60 * 60  
        current_time = time.time()
        file_mtime = Path(self.filepath(dataset_name)).stat().st_mtime
        return (current_time - file_mtime) > one_year_seconds

    def filepath(self, dataset_name: str):
        return self.data_dir_path/f'{dataset_name}'

In [None]:
#| eval: false
local_storage = LocalStorage(Path('../data'))

In [None]:
#| eval: false
local_storage.exists('test.pkl')

False

In [None]:
#| eval: false
df_test = pd.DataFrame(np.random.rand(3,3))
df_test

Unnamed: 0,0,1,2
0,0.527991,0.440262,0.674734
1,0.011943,0.914838,0.430143
2,0.249933,0.384021,0.958614


In [None]:
#| eval: false
local_storage.save(df_test, 'test.pkl')

In [None]:
#| eval: false
local_storage.exists('test.pkl')

True

In [None]:
#| eval: false
df = local_storage.load('test.pkl')
df

Unnamed: 0,0,1,2
0,0.527991,0.440262,0.674734
1,0.011943,0.914838,0.430143
2,0.249933,0.384021,0.958614


In [None]:
#| eval: false
local_storage.is_stale('test.pkl')

False

In [None]:
#| eval: false
local_storage.delete('test.pkl')
local_storage.exists('test.pkl')

False

In [None]:
#| hide 
#| eval: false
import os, glob
for f in glob.glob('../data/*'): os.remove(f)
with open('../data/.gitkeep', 'w') as f: pass 

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()