# Comparison
[Comparison with spreadsheets](https://pandas.pydata.org/docs/getting_started/comparison/comparison_with_spreadsheets.html)
Since many potential pandas users have some familiarity with spreadsheet programs like Excel, this page is meant to provide some examples of how various spreadsheet operations would be performed using pandas. This page will use terminology and link to documentation for Excel, but much will be the same/similar in Google Sheets, LibreOffice Calc, Apple Numbers, and other Excel-compatible spreadsheet software.

In [11]:
import pandas as pd
import numpy as np


tips = pd.read_csv('data/tips.csv', delimiter=',')

# Operations on Columns / Formulas in Spreadsheets
tips['total_bill'] = tips['total_bill'] - 2
tips['new_bill'] = tips['total_bill'] / 2

# Filter
# tips = tips[tips['new_bill'] > 10]
# is_dinner = tips['time'] == 'Dinner'
# tips = tips[is_dinner]
# print(tips['new_bill'].count())

# Conditional Column Creation / IF Statements in Spreadsheets e.g. IF(A1 < 10, "low", "high")
tips['bucket'] = np.where(tips['total_bill'] < 10, 'low', 'high')
filter_bucket = tips['bucket'] == 'high'
# tips = tips[filter_bucket]

# Date Functions / Date and Time Functions in Spreadsheets
tips['date1'] = pd.Timestamp('2013-01-15')
tips['date2'] = pd.Timestamp('2015-02-15')
tips['date1_year'] = tips['date1'].dt.year
tips['date2_month'] = tips['date2'].dt.month
tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin()
tips['months_between'] = tips['date2'].dt.to_period('M') - tips['date1'].dt.to_period('M')

# Selecting Columns / Selecting Cells in Spreadsheets
# tips[['total_bill', 'tip']]  # Select multiple columns
# tips.drop("sex", axis=1, inplace=True)  # Drop a column
# tips.rename(columns={'total_bill': 'total_bill2'}, inplace=True)  # Rename a column

# tips = tips.sort_values(['sex'], ascending=True)  # Sort by a column

# print(tips['time'].str.len())  # String length of column
tips['sex'].str.find('ale') # String search / FIND() in Spreadsheets
# print(tips)




0      3
1      1
2      1
3      1
4      3
      ..
239    1
240    3
241    1
242    1
243    3
Name: sex, Length: 244, dtype: int64