In [None]:
import pandas as pd
import numpy as np

#### pandas.read_csv(filepath_or_buffer, sep=', ', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, doublequote=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)
Read CSV (comma-separated) file into DataFrame

In [None]:
tips = pd.read_csv('tips.csv')
tips.head()

In [None]:
tips['tip%'] = round(tips['tip'] / tips['total_bill'] * 100, 1)
tips.head()

#### DataFrame.to_csv(path_or_buf=None, sep=', ', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.')
Write DataFrame to a comma-separated values (csv) file

In [None]:
tips.to_csv('tips_new.csv')

In [None]:
tips2 = tips.sort_values(by=['tip%'], ascending=False)
tips2.head()

In [None]:
tips2 = tips2.reset_index(drop=True)
tips2.head()

In [None]:
tips[tips['size'] == 6]

In [None]:
print(tips[tips['size'] == 6].shape)
print(tips[tips['size'] == 6].shape[0])

In [None]:
for ppl in range(1, 7):
    print(f'Avg. tip from a table for {ppl} people = ${tips[tips["size"] == ppl]["total_bill"].mean():.2f}')

In [None]:
print(f'Minimum tip% in total bill: {tips["tip%"].min()}%')
print(f'Maximum tip% in total bill: {tips["tip%"].max()}%')

#### pandas.read_html(io, match='.+', flavor=None, header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, tupleize_cols=None, thousands=', ', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True, displayed_only=True)
Read HTML tables into a list of DataFrame objects.

In [None]:
url = 'https://en.wikichip.org/wiki/mediatek/helio'

tables = pd.read_html(url)
print(f'Number of tables on the webpage: {len(tables)}')
tables[3].head()

In [None]:
table = tables[3].copy()
table.columns = [
    'Model',
    'Launched',
    'Process',
    'ISA',
    'Cores',
    'Cores',
    'CPU_Freq',
    'Max Mem',
    'Mem Type',
    'Designer',
    'Name',
    'GPU_Freq']

table.drop([0, 1, 2, 12], axis=0, inplace=True)
table.reset_index(drop=True, inplace=True)

table['Process'] = table['Process'].apply(lambda x: str(x).split('nm')[0] + 'nm')
table['CPU_Freq'] = table['CPU_Freq'].apply(lambda x: str(x).split('GHz')[0] + 'GHz')
table['Max Mem'] = table['Max Mem'].apply(lambda x: str(x).split('GiB')[0] + 'GB')
table['GPU_Freq'] = table['GPU_Freq'].apply(lambda x: str(x).split('MHz')[0] + 'MHz')

table

#### DataFrame.to_excel(excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, freeze_panes=None)
Write DataFrame to an excel sheet

In [None]:
excel_writer = pd.ExcelWriter('Helio.xlsx')
table.to_excel(excel_writer, 'Helio-X')

#### pandas.read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=True, **kwds)
Read an Excel table into a pandas DataFrame

In [None]:
xlsx = pd.ExcelFile('Helio.xlsx')
df = pd.read_excel(xlsx, sheet_name='Helio-X', header=0, index_col=0)
df.set_index('Model', inplace=True)
df.tail()

In [None]:
print(df.loc['Helio X30']['Process'])
print(df.loc['Helio X30']['Launched'])

In [None]:
def month2num(x):
    day, month, year = x.split(' ')
    months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'Octobor', 'November', 'December']
    return '/'.join([str(months.index(month)+1), day, year])
    
df['Launched'] = df['Launched'].apply(month2num)
df