In [1]:
import os
import pandas as pd
import jupyter_black

jupyter_black.load()

current_dir = os.path.dirname(os.path.abspath("__file__"))

In [2]:
babynames_path = os.path.join(current_dir, "data", "babynames.csv")
babynames_path_xlsx = os.path.join(current_dir, "data", "babynames.xlsx")
single_path = os.path.join(current_dir, "data", "Data - Single Worksheet.xlsx")
multiple_path = os.path.join(current_dir, "data", "Data - Multiple Worksheets.xlsx")

### Pass an URL to the `pd.read_csv()` Method

In [3]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv?accessType=DOWNLOAD"
babynames = pd.read_csv(url)

In [4]:
babynames.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49509 entries, 0 to 49508
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Year of Birth       49509 non-null  int64 
 1   Gender              49509 non-null  object
 2   Ethnicity           49509 non-null  object
 3   Child's First Name  49509 non-null  object
 4   Count               49509 non-null  int64 
 5   Rank                49509 non-null  int64 
dtypes: int64(3), object(3)
memory usage: 2.3+ MB


### Quick Object Conversion

In [5]:
babynames["Child's First Name"].to_frame()
babynames["Child's First Name"].tolist()
babynames["Child's First Name"].to_dict()
babynames["Child's First Name"].str.title()
", ".join(babynames["Child's First Name"].str.title().drop_duplicates().sort_values())[
    0:50
]

'Aahil, Aaliyah, Aarav, Aaron, Aarya, Aaryan, Aayan'

In [6]:
babynames.to_csv(
    babynames_path,
    index=False,
    columns=["Gender", "Ethnicity"],
    encoding="utf-8",
)

### Excel

In [7]:
pd.read_excel(single_path)

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [8]:
pd.read_excel(multiple_path, sheet_name=0)

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [9]:
pd.read_excel(multiple_path, sheet_name=0)
pd.read_excel(multiple_path, sheet_name=1)
pd.read_excel(multiple_path, sheet_name="Data 1")
result = pd.read_excel(multiple_path, sheet_name=[0, 1])
result[0]
result[1]
result = pd.read_excel(multiple_path, sheet_name=None)
result["Data 1"]
result["Data 2"]

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


### Export Excel File with the `to_excel()` Method

In [10]:
girls = babynames[babynames["Gender"] == "FEMALE"]
boys = babynames[babynames["Gender"] == "MALE"]
excel_file = pd.ExcelWriter(babynames_path_xlsx)

girls.to_excel(excel_file, sheet_name="girls", index=False)
boys.to_excel(
    excel_file,
    sheet_name="boys",
    index=False,
    columns=["Child's First Name", "Ethnicity"],
)

excel_file.save()

  excel_file.save()
