In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
print('pandas version: {}'.format(pd.__version__))
print('numpy version: {}'.format(np.__version__))
print('matplotlib version: {}'.format(mpl.__version__))

## Pandas Tutorials
1. [10 minutes to pandas](https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html#min)
2. [What does axis in pandas mean?](https://stackoverflow.com/questions/22149584/what-does-axis-in-pandas-mean)

In [None]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
print(s)
dates = pd.date_range('20191213', periods = 13)
print(dates)
df = pd.DataFrame(np.random.randn(13, 4), index = dates, columns=list('ABCD'))
print(df)
df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20191213'),
                    'C': pd.Series(1, index = range(4), dtype = 'float32'),
                    'D': np.array([5] * 4, dtype = 'int32'),
                    'E': pd.Categorical(['app', 'iot', 'connectivity', 'embedded']),
                    'F': 'foo'})
print(df2)
print(df2.dtypes)

In [None]:
df.head()
df.tail(3)
# print(df.index)
# print(df.columns)
df.to_numpy()
df2.to_numpy()
df.describe()
df.T
# print(df.sort_index(axis = 0, ascending = False))
df.sort_index(axis = 1, ascending = False)
df.sort_values(by = 'B')
df['C']
df[0:3]
df['20191214':'20191218']
df.loc[dates[0]]
df.loc[:, ['A', 'C']]
df.loc['20191215':'20191220', ['A', 'B']]
df.loc['20191224', 'C']
df.iloc[3]
df.iloc[4:9, 0:2]
df.iloc[[3, 6, 9], [0, 1]]
df.iloc[2:4, :]
df.iloc[:, 1:2]
df.iloc[1, 1]
df.iat[1, 1]
df[df.A > 0]
df[df > 0]
df3 = df.iloc[0:6, :].copy()
df3['D'] = ['one', 'one', 'two', 'three', 'five', 'right']
df3[df3['D'].isin(['two', 'four'])]

In [None]:
# list data files
import os
print(os.listdir())
print(os.getcwd())
# cannot find TZ
# print(os.environ['TZ'])

In [None]:
classdata = pd.read_csv('ClassSummaryMaster-All-010418-4PM.CSV')

del classdata['textBox35']
del classdata['textBox5']
del classdata['textBox37']
del classdata['textBox38']

classdata

## Switch Columns
Follow [How to change the order of DataFrame columns?](https://stackoverflow.com/questions/13148429/how-to-change-the-order-of-dataframe-columns) and [Renaming columns in pandas](https://stackoverflow.com/questions/11346283/renaming-columns-in-pandas)

Also [Pandas writing dataframe to CSV file](https://stackoverflow.com/questions/16923281/pandas-writing-dataframe-to-csv-file) and [How to avoid Python/Pandas creating an index in a saved csv?](https://stackoverflow.com/questions/20845213/how-to-avoid-python-pandas-creating-an-index-in-a-saved-csv)

In [None]:
cols = classdata.columns.tolist()
print(cols)

cols = cols[-3:-2] + cols[:-3] + cols[-2:]
cols = cols[0:7] + cols[-2:-1] + cols[7:-2] + cols[-1:]
print(cols)

In [None]:
classdataFixed = classdata[cols]

classdataFixed.columns = ['Class Code',
 'Room',
 'Min',
 'Max',
 'Pending',
 'Enrolled',
 'Waitlisted',
 'Open',
 'Start Date',
 'End Date',
 'Start Time',
 'End Time',
 'Days',
 'Adjudicator',
 'Status']
classdataFixed

In [None]:
classdataFixed.to_csv('ClassSummaryMaster-Piano-12282017-2PM.CSV', index = False)

In [None]:
dataset = pd.DataFrame(np.random.randint(0,50,size=(125,2)), columns=('DT','val'))    
d1 = dataset.reset_index()
del d1['index']

d = \
    {name: group for name, group in d1.groupby(np.arange(len(d1)) // 25)}
for key, value in sorted(d.items()):   
    #print(key, value)
    s = \
        {name: group for name, group in value.groupby(np.arange(len(value)) % 5)}
    for key, value in sorted(s.items()):
        print(key, value)
        last_var = value['val'].iloc[-1]

In [None]:
from datetime import *
from dateutil.tz import *
# datetime.date.today().strftime("%B %d, %Y")
# datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y")
datetime.now(tzlocal()).strftime("%Y%m%d-%H%M-%Z")

In [None]:
dfx = pd.read_excel(open('CourseFactorTable.xlsx','rb'), sheetname='Sheet1')
dfx

In [None]:
classdataFixed['Factor'] = 0
classdataFixed['Time'] = 0
classdataFixed

In [None]:
r = dfx.loc[(dfx['CourseCode'] == 'Artists01.A') | (dfx['CourseCode'] == 'Artists01')]
r

In [None]:
classdataFixed['Factor'] = dfx.loc[(dfx['CourseCode'] == classdataFixed['Class Code']) |
                                   (dfx['CourseCode'] == classdataFixed['Class Code'][:-2])]

In [None]:
for i, row in classdataFixed.iterrows():
    # print(row['Class Code'])
    r = dfx[(dfx['CourseCode'] == row['Class Code']) |
                            (dfx['CourseCode'] == row['Class Code'][:-2])]
    if len(r.index) == 1:
        factor = r.iloc[0]['ScheduleFactor']
        classdataFixed.set_value(i, 'Factor', factor)
        classdataFixed.set_value(i, 'Time', factor * (row['Enrolled'] + row['Pending']))

In [None]:
classdataFixed

In [None]:
cols = classdataFixed.columns.tolist()
cols = cols[:2] + cols[-2:] + cols[2:-2]
classdataFixed = classdataFixed[cols]
classdataFixed

example from [Sorting list according to other list in python not working when second list contains numpy arrays](https://stackoverflow.com/questions/48156392/sorting-list-according-to-other-list-in-python-not-working-when-second-list-cont)

In [None]:
a=[np.array([4,5]),np.array([3,4])]
b=[1,1]
e=zip(b,a)
# ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
# sorted(e)
sorted(e, key=lambda x: x[0])

In [None]:
dataset = pd.DataFrame(np.random.randint(0,50,size=(125,2)), columns=('DT','val'))    
d1 = dataset.reset_index()
del d1['index']

d = \
    {name: group for name, group in d1.groupby(np.arange(len(d1)) // 25)}
for key, value in sorted(d.items()):   
    #print(key, value)
    s = \
        {name: group for name, group in value.groupby(np.arange(len(value)) % 5)}
    for key, value in sorted(s.items()):
        print(key, value)
        last_var = value['val'].iloc[-1]