In [1]:
! python -V
! ls .

Python 2.7.17
 __init__.py	       upload_from_csv.py	 vericast_reports.py
'Programs API.ipynb'   uploading_epg_csv.ipynb	 Vericast-reports-tool.ipynb


In [2]:
import os
import json
import pandas as pd
import datetime
import dateutil.parser
         

# Change directory to filesdir
os.chdir('../../data/')
class VericastMatchReporter:
    """
    @params: filename, start_date, start_time, end_date, end_time, time_zone
    """
    filename = None
    channel = None
    report_name = 'vericast-api-matches'
    report_file_extension = 'xslx'
    df = pd.DataFrame(columns=['title','length','album','artist','start_time_utc'])
    matches_between_dates = None
    start_time = None
    end_time = None
    dfeng = None
    
    
    def __init__(self, **kwargs):
        """Initilize report, make a pandas dataframe with the matches."""
        
        self.filename = kwargs['filename']
        self.channel = self.filename.split('/')[-1].split('-')[-1].split('.')[0]
        self.report_name += '-' + self.channel 
        try:
            # Make a df with engineer's times and bring them to UTC for correct comparison
            start_time_naive = dateutil.parser.parse(kwargs['start_date']+'T'+kwargs['start_time'])
            end_time_naive = dateutil.parser.parse(kwargs['end_date']+'T'+kwargs['end_time'])
            dfeng = pd.DataFrame({'naive_datetime':[start_time_naive,end_time_naive]}, index=['filter_start','filter_end'])
            dfeng['local_datetime'] = pd.DatetimeIndex(dfeng['naive_datetime']).tz_localize(tz =kwargs['time_zone'])
            dfeng['utc_datetime'] = pd.DatetimeIndex(dfeng['local_datetime']).tz_convert(tz ='UTC')
            self.dfeng = dfeng
        except:
            raise
        with open(self.filename) as f:
            for line in f.readlines():
                # Append line into df
                self.df = self.df.append(json.loads(line), ignore_index=True)
                
        # Cast dates as aware datetime with timezone 'UTC'
        self.df['start_time_utc'] = pd.to_datetime(self.df['start_time_utc'], yearfirst=True, utc=True)
        print('Reporter initialized.')
        
    def create_report_between_times_xlsx(self):
        # Localize UTC for datetime filtering
        self.df = self.df.set_index('start_time_utc')
        # TODO create the matches
        # matches = self.df.loc[(self.df['start_time_utc'] > self.start_time) & (self.df['start_time_utc'] < self.end_time)]
        # matches = self.df.between_time(self.start_time, self.end_time)
        print('File {} with {} matches has been created.'.format(self.report_name, self.matches_between_dates.count()))
        return self.matches_between_dates
    
    def __str__(self):
        return 'Vericast API matches from '+self.channel.replace('_', ' ').upper()+' between times '+str(self.start_time)+' and '+str(self.end_time)+' UTC.'

    def __repr__(self):
        return "<{}: Channel {}>".format(self.__class__.__name__, self.channel)

In [10]:
# Create a reporter instance with engineer's data
report = VericastMatchReporter(
    filename='matches-la_sexta.json', 
    start_date='2018-01-29', 
    start_time='12:00', 
    end_date='2018-02-01',
    end_time='00:00',
    time_zone='Europe/Madrid'
)

Reporter initialized.


In [16]:
# show the dfeng created by reporter from engineer's data
report.dfeng.head(2)

Unnamed: 0,naive_datetime,local_datetime,utc_datetime
filter_start,2018-01-29 12:00:00,2018-01-29 12:00:00+01:00,2018-01-29 11:00:00+00:00
filter_end,2018-02-01 00:00:00,2018-02-01 00:00:00+01:00,2018-01-31 23:00:00+00:00


In [15]:
# show the df created by reporter from file
report.df.sample(10).sort_values(by=['start_time_utc'])

Unnamed: 0,title,length,album,artist,start_time_utc
470,Eye Spy,17,Vol. 8: Spy Themes Remixed,"Chuck Cassey, Jules Bromley",2018-01-25 17:01:05+00:00
436,I Like You (instrumental),14,"Offbeat, Cute & Quirky - Instrumentals",Phil Buckle,2018-01-27 00:41:17+00:00
426,Those Three Words,13,Abstract Future - Let You In,"Andrew Haas, Ian Franzino, Laura Lane",2018-01-27 08:12:07+00:00
392,Dangerous Search,8,DramaLand,"Alec Williams , Andy Asfendis",2018-01-28 12:17:42+00:00
333,Never Get Enough,19,Stacks of Soul Wax,"Marc Ferrari, Michael McGregor",2018-01-31 17:09:07+00:00
303,Rainbow Odyssey,7,Electro Spectrum,"Josselin Bordat, Le Fat Club",2018-02-01 14:34:11+00:00
286,Walk The Walk,4,ST172 From The Archives Vol. 9 - Retro Electronic,Taso Kotsos,2018-02-02 05:21:20+00:00
241,Gatsby's Groove,10,Sunny Side Up,"Andy Blythe, Marten Joustra",2018-02-04 10:24:09+00:00
18,Gatsby's Groove,8,Sunny Side Up,"Andy Blythe, Marten Joustra",2018-02-15 15:54:17+00:00
7,One Way,4,Rap & Hip Hop Vol. 1,"Nadin Polanco, Robert Ayala",2018-02-16 00:11:40+00:00


In [None]:
# Set df's index to be time series
df = report.df.set_index('start_time_utc')

# show sample of time indexed and sorted df
df.sample(10).sort_values(by=['start_time_utc'])

In [None]:
df.loc[df. index['#'], 'NAME']

In [20]:
# Make a mask boolean mask between dates
mask = (report.df['start_time_utc'] >= report.dfeng.loc['filter_start']['utc_datetime']]) & (report.df['start_time_utc'] <= report.dfeng['filter_end']['utc_datetime'])
matches_between_dates = report.df.loc[mask]

KeyError: 'filter_start'

In [None]:
# From the engineers I get this
kwargs = {}
kwargs['filename']='matches-la_sexta.json'
kwargs['start_date']='2018-02-09'
kwargs['start_time']='22:33'
kwargs['end_date']='2018-02-14'
kwargs['end_time']='00:01'
kwargs['time_zone']='Europe/Madrid'
print(kwargs.items())


In [None]:
start_time_naive = dateutil.parser.parse(kwargs['start_date']+'T'+kwargs['start_time'])
end_time_naive = dateutil.parser.parse(kwargs['end_date']+'T'+kwargs['end_time'])

dfeng = pd.DataFrame({'naive_datetime':[start_time_naive,end_time_naive]}, index=['start','end'])
dfeng['local_datetime'] = pd.DatetimeIndex(dfeng['naive_datetime']).tz_localize(tz =kwargs['time_zone'])
dfeng['utc_datetime'] = pd.DatetimeIndex(dfeng['local_datetime']).tz_convert(tz ='UTC')

dfeng

In [None]:

# Make engineer's time format same as filename datetime format (localize and convert to UTC)
user_start_time='{}T{}.000000'.format(kwargs['start_date'],kwargs['start_time']+':00')
# local & unaware
start_time_local = datetime.datetime.strptime(user_start_time,'%Y-%m-%dT%H:%M:%f.000000')
# localize by adding timezone
print(user_start_time)
print(start_time_local)

In [None]:
dfeng

In [None]:
dfeng = pd.DataFrame(columns=['naive_datetime','local_datetime','utc_datetime'])

start_time_naive = dateutil.parser.parse(kwargs['start_date']+'T'+kwargs['start_time'])
end_time_naive = dateutil.parser.parse(kwargs['end_date']+'T'+kwargs['end_time'])

dfeng = dfeng.append({'naive_datetime':start_time_naive},ignore_index=True)
# dfeng = dfeng.set_index('naive_datetime')
dfeng

<hr>

The time value a:
- returned by:
    - `gmtime()`
    - `localtime()`
    - `strptime()`
- accepted by:
    - `asctime()`
    - `mktime()`
    - `strftime()`
    
is a sequence of 9 integers.

Return values of:

    - `gmtime()`
    - `localtime()`
    - `strptime()`
    
also offer attribute names for individual fields.

# Goals:
- Localize & convert to UTC engineers time.
```python
start_date='2018-01-29', 
start_time='12:00', 
end_date='2018-02-01',
end_time='00:00:00',
tz_offset='Europe/Madrid'

# Unaware
user_start_time='%sT%s00.000000'.format(start_date,start_time)
```


- And I want start_time_utc in epoch too

<hr>

## Engineer input