This Script:
- brings in **acoustic data**
    - explodes between days
- makes a **test date range** (Jan 1 - Feb 28 2011)
    - left joins (cleaned, less columns) acoustic data to test date range 
        --> **Acou_JanFeb**
- creates **fake prey data**
    - left joins **fake prey data** to **Acou_JanFeb** 
        --> **CCB_JanFeb**
- made some plots of Jan/Feb
- attempted to create new column that describes data availability/overlap

In [None]:
import pandas as pd
import altair as alt
import os
import numpy as np

In [None]:
os.chdir('/Users/cristiana/Documents/Duke/MP/Python')

In [None]:
# read in CCB Acoustic metadata, parse date columns
ccb = pd.read_csv('./Data/CCB Metadata.csv', 
                 sep=',', 
                 encoding='utf-8',
                 parse_dates = ['deployDate','recoveryDate'])

ccb.head()

In [None]:
# add between days column, explode out (row for each day)
ccb['between_days'] = ccb.apply(lambda row: pd.date_range(row['deployDate'],row['recoveryDate'],freq='D'), axis=1)
ccb_days = ccb.explode('between_days')
ccb_days

### Create new dataframe with test date range (Jan-Feb of 2011)

In [None]:
# create date range for full time period and add it to dataframe (will test with Jan-Feb 2011)
start_date = '2011-01-01'
end_date = '2011-02-28'

JanFeb2011 = pd.DataFrame({'between_days':pd.date_range(start_date, end_date)})
JanFeb2011.info()

In [None]:
# join ccb data to JanFeb date range
JanFeb_ccb = JanFeb2011.merge(ccb_days, on = 'between_days', how = 'left')
JanFeb_ccb

In [None]:
#pd.DataFrame.to_csv(JanFeb_ccb, "./scratch/JanFeb_ccb.csv")

### Create example prey data

In [None]:
# create fake prey data (add between days column, explode out (row for each day))
# (fake prey = Jan03 - Feb03, 2011)
prey_count = 40
prey_start = '2011-01-03'
prey_end = '2011-02-03'

prey = pd.DataFrame({'Transect': prey_count,
                    'between_days': pd.date_range(prey_start, prey_end)})
prey.head()

### Clean acoustic data

In [None]:
ccb_days.head()

In [None]:
# select essential columns of acoustic data
Acoustic_clean = ccb_days[['c_uniqueUnitID', 'between_days']]
Acoustic_clean

In [None]:
# join clean acoustic data to test (JanFeb) date range
acou_JanFeb = JanFeb2011.merge(Acoustic_clean, on = 'between_days', how = 'left')
acou_JanFeb

In [None]:
# join Acou_JanFeb to fake prey data
CCB_JanFeb = acou_JanFeb.merge(prey, on = 'between_days', how = 'left')
CCB_JanFeb

In [None]:
CCB_JanFeb.dtypes

In [None]:
CCB_JanFeb['c_uniqueUnitID'].astype('str')

In [None]:
CCB_JanFeb.info()

In [None]:
type(CCB_JanFeb['c_uniqueUnitID'][1])

### Try to list data availability/overlap in one column

In [None]:
CCB_JanFeb['Transect']

In [None]:
for x in CCB_JanFeb['Transect']:
    print(x, type(x))

In [None]:
CCB_JanFeb['DataAvail'] = ['No' if x == 'nan' else 'Yes' for x in CCB_JanFeb['Transect']] 
CCB_JanFeb

In [None]:
# create conditions
conditions = [
        (CCB_JanFeb['c_uniqueUnitID'] == 'NaN') & (CCB_JanFeb['Transect'] == 'NaN'),
        (CCB_JanFeb['c_uniqueUnitID'] != 'NaN') & (CCB_JanFeb['Transect'] != 'NaN'),
        (CCB_JanFeb['c_uniqueUnitID'] == 'NaN') & (CCB_JanFeb['Transect'] != 'NaN'),
        (CCB_JanFeb['c_uniqueUnitID'] != 'NaN') & (CCB_JanFeb['Transect'] == 'NaN')]

# create resulting values for conditions
values = ['No Data', 'Both', 'Prey', 'Acoustic']

# create column to fill
CCB_JanFeb['DataAvailability'] = np.select(conditions, values)

CCB_JanFeb

In [None]:
# create a list of our conditions
conditions = [
    (df['likes_count'] <= 2),
    (df['likes_count'] > 2) & (df['likes_count'] <= 9),
    (df['likes_count'] > 9) & (df['likes_count'] <= 15),
    (df['likes_count'] > 15)
    ]

# create a list of the values we want to assign for each condition
values = ['tier_4', 'tier_3', 'tier_2', 'tier_1']

# create a new column and use np.select to assign values to it using our lists as arguments
df['tier'] = np.select(conditions, values)

# display updated DataFrame
df.head()

### Plahts

In [None]:
# plot prey and acoustic data availability in Jan and Feb
alt.Chart(CCB_JanFeb).mark_rect().encode(
    x = alt.X('between_days:T', timeUnit = 'monthdate'), # dif btwn year(btwn_days) and btwndays, timUnit
    color = 'Transect:N'
).properties(
    width = 500,
    height = 50
).facet(
    row = 'month(between_days)')



In [None]:
# plot prey and acoustic data availability in Jan and Feb
alt.Chart(CCB_JanFeb).mark_rect().encode(
    x = alt.X('date(between_days):O'), # dif btwn year(btwn_days) and btwndays, timUnit
    color = 'DataAvailability:N'
).properties(
    width = 500,
    height = 50
).facet(
    row = 'month(between_days)')

