# Default imports

In [None]:
import numpy as np
import pandas as pd

# Show more rows by default

We need to set `min_rows` if we want to see more when truncated.

- [Docs](https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html)
- [Stack Overflow answer](https://stackoverflow.com/a/57861411/61109)


In [None]:
pd.set_option('display.max_rows', 100) # default 60, None means all
pd.set_option('display.min_rows', 50) # default 10

# Show more/less columns
[More info](https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html#frequently-used-options)

In [None]:
pd.set_option('display.max_columns', 15)

# Don't use scientific notation
[More info](https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html?highlight=float_format)

In [None]:
pd.options.display.float_format = '{:.2f}'.format

# Load datatables extension
[More info](https://colab.research.google.com/notebooks/data_table.ipynb#scrollTo=oEQUmjXkHZz-)

In [None]:
%load_ext google.colab.data_table

# Show value of last line
Even if it's an assignment.

⚠️ [This does not work on Google Colab](https://stackoverflow.com/questions/62229579/google-collab-how-to-show-value-of-assignments)

[More info.](https://ipython.readthedocs.io/en/stable/config/options/terminal.html#configtrait-InteractiveShell.ast_node_interactivity)

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "last_expr_or_assign"

# Connect to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Cashier dataset

Randomized dataset with columns:

- datetime
- total_amount
- number_of_products
- cashier
- store
- city
- discount_card_used

In [None]:
import pandas as pd
import random
from datetime import datetime, timedelta

first_names = [
    "Maria Carmen",
    "Maria",
    "Carmen",
    "Josefa",
    "Isabel",
    "Ana Maria",
    "Maria Dolores",
    "Maria Pilar",
    "Maria Teresa",
    "Ana",
    "Laura",
    "Francisca",
    "Antonia",
    "Dolores",
    "Maria Angeles",
    "Cristina",
    "Marta",
    "Maria Jose",
    "Maria Isabel",
    "Pilar",
    "Maria Luisa",
    "Lucia",
    "Concepcion",
    "Elena",
    "Mercedes",
    "Manuela",
    "Rosa Maria",
    "Raquel",
    "Sara",
    "Maria Jesus",
    "Paula",
    "Juana",
    "Teresa",
    "Rosario",
    "Encarnacion",
    "Beatriz",
    "Rosa",
    "Nuria",
    "Silvia",
    "Montserrat",
    "Julia",
    "Patricia",
    "Irene",
    "Monica",
    "Andrea",
    "Rocio",
    "Angela",
    "Maria Mar",
    "Margarita",
    "Sonia",
    "Sandra",
    "Susana",
    "Alicia",
    "Yolanda",
    "Alba",
    "Maria Josefa",
    "Marina",
    "Natalia",
    "Maria Rosario",
    "Inmaculada",
    "Angeles",
    "Esther",
    "Maria Mercedes",
    "Ana Isabel",
    "Eva",
    "Veronica",
    "Amparo",
    "Noelia",
    "Maria Rosa",
    "Maria Victoria",
    "Maria Concepcion",
    "Carolina",
    "Claudia",
    "Eva Maria",
    "Catalina",
    "Consuelo",
    "Victoria",
    "Lorena",
    "Ana Belen",
    "Maria Antonia",
    "Maria Elena",
    "Miriam",
    "Emilia",
    "Nerea",
    "Luisa",
    "Ines",
    "Maria Nieves",
    "Gloria",
    "Lidia",
    "Carla",
    "Aurora",
    "Esperanza",
    "Josefina",
    "Sofia",
    "Milagros",
    "Olga",
    "Celia",
    "Maria Soledad",
    "Purificacion",
]

store_names = [
    "Situwala",
    "Yakkha",
    "Gnawale",
    "Buhyo",
    "Kutal",
    "Bindukar",
    "Upeti",
    "Mana",
    "Badhyo",
    "Barme",
    "Dhnaju",
    "Kami",
    "Baidhaya",
    "Ma",
    "Ghimere",
    "Sangami",
    "Ghotane",
    "Kewat",
    "Singtan",
    "Chitrakr",
    "Khwaonjoo",
    "Manjhi",
]

city_names = [
    "Távora",
    "Cataguases",
    "Médici",
    "Bressane",
    "Onça",
    "Brígida",
    "Parati",
    "Mônica",
    "Pantano",
    "Anastácio",
    "Ostras",
    "Jequitibá",
]


def get_datetime_string(min_year=1980, max_year=datetime.now().year):
    # generate a datetime in format yyyy-mm-dd hh:mm:ss.000000
    start = datetime(min_year, 1, 1, 0, 0, 0)
    years = max_year - min_year + 1
    end = start + timedelta(days=365 * years)
    return start + (end - start) * random.random()


get_total_amount = lambda: round(random.random() * 500, 2)

# Distributed evenly, not realistic.
get_number_of_products = lambda: random.randint(0, 200)

get_cashier = lambda: random.choice(first_names)

get_store_name = lambda: random.choice(store_names)

get_city = lambda: random.choice(city_names)

get_discount_card = lambda: random.random() > 0.5


def get_random_data(fn, n):
    return [fn() for _ in range(n)]


num_rows = 200

data = {
    "datetime": get_random_data(get_datetime_string, num_rows),
    "total_amount": get_random_data(get_total_amount, num_rows),
    "number_of_products": get_random_data(get_number_of_products, num_rows),
    "cashier": get_random_data(get_cashier, num_rows),
    "store": get_random_data(get_store_name, num_rows),
    "city": get_random_data(get_city, num_rows),
    "discount_card_used": get_random_data(get_discount_card, num_rows),
}

df = pd.DataFrame(data=data)

# Dataset: large and small numbers

Randomly generated dataset with very small and big numbers.

In [4]:
import pandas as pd
import random

get_small_number = lambda: random.random() / 10_000_000
get_large_number = lambda: random.random() * 10_000_000

def get_random_data(fn, n):
    return [fn() for _ in range(n)]


num_rows = 600
data = {
    "hours": get_random_data(get_small_number, num_rows),
    "kilometers": get_random_data(get_large_number, num_rows),
}



df = pd.DataFrame(data=data)
df

Unnamed: 0,hours,kilometers
0,2.659801e-08,7.766627e+06
1,5.271829e-08,3.024710e+06
2,9.717906e-08,3.402307e+06
3,3.485674e-08,7.323914e+06
4,5.638550e-09,7.569620e+06
...,...,...
595,8.025507e-08,6.835332e+06
596,7.493424e-08,9.796151e+06
597,3.332021e-08,3.779437e+06
598,1.872613e-08,1.646130e+06


# Set precision

This is an IPython setting
Outside of pandas datastructures.

[Docs](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-precision)

Reset by passing no value.

In [6]:
%precision 6
1/3

0.333333