In [2]:
import os
import numpy as np
import pandas as pd
from datetime import time

def decimal_time(t, direction='to_decimal'):
    """ Convert time object to decimal and decimal to time object depending on the direction given

    Arguments:
        t : datetime object if direction is 'to_decimal'
            float if direction is 'to_hexadecimal'
    Returns:
        float if direction is 'to_decimal'
        datetime object if direction is 'to_hexadecimal`
    """

    if direction =='to_decimal':
        return t.hour+t.minute/60
    elif direction=='to_hexadecimal':
        hours=int(t)
        minutes=int(t-hours)*60
        return time(hours,minutes)
    else:
        raise ValueError("Invalid direction, choose 'to_decimal'or 'to_hexadecimal'")
    

class IceModel(object):

    """
    Simple model that fits trend to historic data to extrapolate future break up date.

    Only considers previous break up dates. 


    The model compute the date and day separately

    METHODS:
        Polifit: Polinomic fit
        Distribtution: Fits number of distributions
    """

    def __init__(self,df):
        """Initializing object with DataFrame with break up dates.
        Args:
        df(_pandas DataFrame_): Specific format and column names are hard coded based on the file `BreakUpTime.csv` 
        """
        self.df=df.copy()
        self._predicted_day_of_break_up=None  # the object is initialize as having no prediction
        self._predicted_time_of_break_up=None


# Time of break up
    @property
    def date_time(self):
        return  pd.to_datetime(self.df[['Year', 'Month', 'Day', 'Hour', 'Minute']])
    
    @property
    def time(self):
        return self.date_time.dt.time
    @property
    def decimal_time(self):
        return self.time.apply(lambda t: decimal_time(t,direction='to_decimal'))
   
    @property
    def fit_time(self):
        return self.fit_time
    @fit_time.setter
    def fit_time(self,value):   # revisar con test
        self.fit_time=value
# day of break up
    @property
    def day_of_year(self):
        return self.date_time.dt.dayofyear.tolist()
   
    @property
    def year(self):
        return self.date_time.dt.year
   
    @property
    def fit_day_of_year(self):
        return self._fit_day_of_year
    @fit_day_of_year.setter
    def fit_day_of_year(self,value):
        self._fit_day_of_year=value
   
    @property
    def predicted_day_of_break_up(self):
        if self.predicted_day_of_break_up is None:
            raise ValueError(" Predicton of day of break up has not been made")
        return self.get_predicted_day
    @predicted_day_of_break_up.setter
    def predicted_day_of_break_up(self,value):
        self._predicted_day_of_break_up=value
    
    @property
    def predicted_time_of_break_up(self):
        if self.predicted_time_of_break_up is None:
            raise ValueError(" Predicton of time of break up has not been made")
        return self.get_predicted_time
    @predicted_time_of_break_up.setter
    def predicted_dtime_of_break_up(self,value):
        self._predicted_time_of_break_up=value
   
    @property
    def prediction(self):
        if self._prediction is None:
            raise ValueError(" Predicton of  date and time of break up has not been made")
        return self.get_prediction
    @predicted_time_of_break_up.setter
    def predicted_dtime_of_break_up(self,value):
        self._predicted_time_of_break_up=value


   # General stuff 
   
       
    # methods
    def polyfit(self,x_property,y_property,degree,norm_order=2,print_eq=True):
        """ Fit polynomial function to properties of object

        Args: 
            x_property : name of property
            y_property : name of property
            degree (_int_) : degree of polynomial
            norm (_int_) : degree of norm used to compute residuals, Default=2 
            print_eq (_bool_) : determines if the equation of the fitted polynomial is printed

        Prints:
            Coefficient of polynomial fit
        Returns:
            dict : dictionary with fitted polynomial, name of the variables use for the fit and goodness of fit metrics
        """


        x=getattr(self,x_property)
        if y_property =='time':  # we want to use decimal time for the fit
            y_property='decimal_time'
        y=getattr(self,y_property)

        #print(x,y)

        coefs=np.polyfit(x,y,degree)

        
        polynomial=np.poly1d(coefs)
        
        if print_eq:
            print(polynomial)
            
        # Godness of fit
        y_predict=polynomial(x)
        residuals=y-y_predict
        norm=np.linalg.norm(residuals,norm_order)  

        # this metrics are not generilzed for higher order norms, they simply are the traditional metrics
        ss_res=np.sum(residuals**2)
        ss_tot=np.sum((y-np.mean(y))**2)

        r2=1-(ss_res/ss_tot)

        rmse=np.sqrt(np.mean((y-y_predict)**2))
        nrmse=rmse/(np.max(y)-np.min(y))

        n=len(y)  # number of points
        k=degree # how many coef are we estimating
        R2=1-((1-r2)*(n-1))/(n-k-1)
        
        gofs={f'{norm_order:}th norm':round(norm,4),'r2':round(r2,4),'R2':round(R2,4),'RMSE':round(rmse,4),'normalized RMSE':round(nrmse,4)}

        return {'Poly fit coefficients':polynomial,'(x,y)=':[x_property,y_property],'gofs metrics':gofs}
    
    
    

    def predict(self,variable,new_x):
        """
        Uses the fit associated with property x  to predict y based on new value of x

        Args
            self.fit(_numpy.poly1d_) attribute object with the choosen fit
            new_x:(_int_) value use to predict
            name(_str_): what are we predicting? 
        Return
            y_predict(int): predicted value


        """
        if self.check_property(variable): 
            fit=getattr(self,('fit_'+str(variable)))
        
            fit_coefs=fit['Poly fit coefficients']
            predicted_y=fit_coefs(new_x)
            
            return {'(x,y)': fit['(x,y)='],'x_hat':new_x,'y_hat':round(predicted_y,4)}
        else:
            raise AttributeError(f"Varieble '{variable}' is not part of the predicted variables")
    
    
    def check_property(self,prop_name):
        """
        simple method that check if a fit corresponding to that variable exists
        """
        if not hasattr(self,prop_name):
            raise AttributeError(f'variable "{prop_name}" not part of the model')
        else: 
            return True
        
    def get_predicted_day(self,year):
        """
        Assign the predicted date of break up related to `.fit_day_of_break_up` and the chosen year 
        Args:
        year(_int_): year for which the prediction will be made

        Raises:
            ValueError: if there are not fit associated to the predicted date, the method cannot get the prediction
        Return:
            gets predicted day of break up (day_of_year)
        """
        if self.fit_day_of_year:
            self._predicted_day_of_break_up=self.predict(self._predicted_day_of_break_up,year)
          #  self.prediction=pd.to_datetime(f"{self.fit_day_of_year['yhat']}-{self.fit_time['yhat']}",format='%Y-%j')
        else: 
            raise ValueError("No predicted time of break up  has been made or assign")
        
    def get_predicted_time(self,year):
        """
        Assign the predicted time of break up related to `.fit_time` and the chosen year 
        Args:
        year(_int_): year for which the prediction will be made
        Raises:
            ValueError: if there are not fit associated to the predicted time, the method cannot get the prediction
        Returns:
            gets predicted time of break up ( ans transform from decimal time to hexadecimal)
        """
        if self.fit_time:
            self._predicted_time_of_break_up=decimal_time(self.predict(self._predicted_time_of_break_up,year),direction='to_hexadecimal')
        
        else: 
            raise ValueError("No predicted time of break up  has been made or assign")
    def get_prediction(self,year):
        """
         Get the predicted date(day of year) and time associated with the year

         The predicted date and time could have been set by the .get_predicted...  or assigned manually as they are properties of the class
        Args:
            year (_int_): 
        """
        if self.predicted_day_of_break_up and self.predicted_time_of_break_up:
            # we are re-getting value just to make sure they correspond to the lattest assigned values
            day=self.get_predicted_day(self,year)
            time=self.get_predicted_time(self,year)
            self.prediction=pd.to_datetime(f"{self.predicted_day_of_break_up}-{self.predicted_time_of_break_up}",format='%Y-%j')

    
    
    
    

## OOP  & Simple models for ice break up
An effective way to create and use python model is by using and Object Oriented Programming approach, in our case we will use the class `IceModel` to construct different models by instancing an object of the class.

#### Preprocessing
In part 1 of this interactive textbook you familiarized yourself with the **Nenana Ice Classic**  got introduce to a `DataFrame` containing environmental variables that may help to predict the break up, and learned some basic preprocessing techniques.

For the moment let just consider the data associated only with the past break up dates, information that is stored in `'../data/BreakUpTimes.csv'`

In [4]:
# loading the file
ice_data = pd.read_csv('../../data/BreakUpTimes.csv')

FileNotFoundError: [Errno 2] No such file or directory: '../../data/BreakUpTimes.csv'