# Input and Output

In [1]:
import pandas as pd

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [3]:
url = "https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv"
baby_names = pd.read_csv(url)
baby_names.head()

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53


## Export DataFrame to CSV File
- The `to_csv` method exports a **DataFrame** to a CSV file.
- Its first argument is the filename.
- By default, pandas will include the index. Set the `index` parameter to False to exclude the index.
- The `columns` parameter limits the exported columns.

In [None]:
baby_names.to_csv()  # print the whole content out there in a csv string format

In [6]:
baby_names.to_csv('baby_names.csv')  # index as a column

In [7]:
baby_names.to_csv('baby_names.csv', index=False)  # don't store index as a column

In [8]:
baby_names.to_csv('baby_names.csv', index=False, columns=['Year of Birth', "Child's First Name", 'Count'])  # only store specific columns

## Install openpyxl Library to Read and Write Excel Files

## Import Excel File into pandas
- The `read_excel` function reads an Excel file/workbook into a **DataFrame**.
- Use the `sheet_name` parameter if the workbook contains multiple worksheets. Pass a single worksheet name or a list of worksheet names/index positions.
- Pass the `sheet_name` parameter an argument of **None** to include all worksheets.
- Pandas will store multiple worksheets in a Python dictionary. The keys will be the worksheet names, and the values will be the **DataFrames**.

In [9]:
pd.read_excel('Data - Single Worksheet.xlsx')

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [15]:
pd.read_excel('Data - Multiple Worksheets.xlsx')  # read first worksheet as default
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name='Data 1')
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=0)

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [16]:
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name='Data 2')
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=1)

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


In [20]:
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=['Data 1', 'Data 2'])  # read 2 worksheets simultaneously and stored in a dictionary
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=[0, 1])
pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=None)

{'Data 1':   First Name Last Name           City Gender
 0    Brandon     James          Miami      M
 1       Sean   Hawkins         Denver      M
 2       Judy       Day    Los Angeles      F
 3     Ashley      Ruiz  San Francisco      F
 4  Stephanie     Gomez       Portland      F,
 'Data 2':   First Name Last Name           City Gender
 0     Parker     Power        Raleigh      F
 1    Preston  Prescott   Philadelphia      F
 2    Ronaldo   Donaldo         Bangor      M
 3      Megan   Stiller  San Francisco      M
 4     Bustin    Jieber         Austin      F}

In [21]:
data = pd.read_excel('Data - Multiple Worksheets.xlsx', sheet_name=None)
data['Data 1']

Unnamed: 0,First Name,Last Name,City,Gender
0,Brandon,James,Miami,M
1,Sean,Hawkins,Denver,M
2,Judy,Day,Los Angeles,F
3,Ashley,Ruiz,San Francisco,F
4,Stephanie,Gomez,Portland,F


In [23]:
data['Data 2']

Unnamed: 0,First Name,Last Name,City,Gender
0,Parker,Power,Raleigh,F
1,Preston,Prescott,Philadelphia,F
2,Ronaldo,Donaldo,Bangor,M
3,Megan,Stiller,San Francisco,M
4,Bustin,Jieber,Austin,F


## Export Excel File from pandas
- The **ExcelWriter** class writes one or more **DataFrames** to an Excel file.
- Use a context manager (the `with` keyword) in combination with the **ExcelWriter** object and an assigned variable.
- Invoke the `to_excel` method on every **DataFrame** to include in the Excel workbook and pass in the **ExcelWriter** object as the first argument.
- The `to_excel` method supports `sheet_name`, `index`, and `columns` parameters.

In [30]:
baby_names[baby_names['Gender'] == 'FEMALE']

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53
...,...,...,...,...,...,...
56572,2014,FEMALE,WHITE NON HISPANIC,Yocheved,28,68
56573,2014,FEMALE,WHITE NON HISPANIC,Zelda,12,84
56574,2014,FEMALE,WHITE NON HISPANIC,Zissy,25,71
56575,2014,FEMALE,WHITE NON HISPANIC,Zoe,96,21


In [27]:
females = baby_names[baby_names['Gender'] == 'FEMALE']
males = baby_names[baby_names['Gender'] == 'MALE']

In [28]:
with pd.ExcelWriter('NYC Baby Data.xlsx') as excel_file:
    females.to_excel(excel_file, sheet_name='Female', index=False)
    males.to_excel(excel_file, sheet_name='Male', index=False, columns=['Year of Birth', "Child's First Name", 'Rank'])

In [31]:
pd.read_excel('NYC Baby Data.xlsx', sheet_name='Female')

Unnamed: 0,Year of Birth,Gender,Ethnicity,Child's First Name,Count,Rank
0,2011,FEMALE,HISPANIC,GERALDINE,13,75
1,2011,FEMALE,HISPANIC,GIA,21,67
2,2011,FEMALE,HISPANIC,GIANNA,49,42
3,2011,FEMALE,HISPANIC,GISELLE,38,51
4,2011,FEMALE,HISPANIC,GRACE,36,53
...,...,...,...,...,...,...
29397,2014,FEMALE,WHITE NON HISPANIC,Yocheved,28,68
29398,2014,FEMALE,WHITE NON HISPANIC,Zelda,12,84
29399,2014,FEMALE,WHITE NON HISPANIC,Zissy,25,71
29400,2014,FEMALE,WHITE NON HISPANIC,Zoe,96,21


In [33]:
pd.read_excel('NYC Baby Data.xlsx', sheet_name='Male')

Unnamed: 0,Year of Birth,Child's First Name,Rank
0,2013,Jared,80
1,2013,Jariel,80
2,2011,AARAV,51
3,2011,AARON,19
4,2011,ABDUL,46
...,...,...,...
28175,2014,Yousef,94
28176,2014,Youssef,88
28177,2014,Yusuf,96
28178,2014,Zachary,39


In [34]:
pd.read_excel('NYC Baby Data.xlsx', sheet_name=None)

{'Female':        Year of Birth  Gender           Ethnicity Child's First Name  Count  \
 0               2011  FEMALE            HISPANIC          GERALDINE     13   
 1               2011  FEMALE            HISPANIC                GIA     21   
 2               2011  FEMALE            HISPANIC             GIANNA     49   
 3               2011  FEMALE            HISPANIC            GISELLE     38   
 4               2011  FEMALE            HISPANIC              GRACE     36   
 ...              ...     ...                 ...                ...    ...   
 29397           2014  FEMALE  WHITE NON HISPANIC           Yocheved     28   
 29398           2014  FEMALE  WHITE NON HISPANIC              Zelda     12   
 29399           2014  FEMALE  WHITE NON HISPANIC              Zissy     25   
 29400           2014  FEMALE  WHITE NON HISPANIC                Zoe     96   
 29401           2014  FEMALE  WHITE NON HISPANIC               Zoey     26   
 
        Rank  
 0        75  
 1        