In [1]:
import pandas as pd
import os

In [2]:
surveys_df = pd.read_csv("data/surveys.csv")

In [3]:
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']
print(', '.join(animals))

lion, tiger, crocodile, vulture, hippo


In [4]:
for year in surveys_df['year'].unique():
    surveys_year = surveys_df[surveys_df.year == year].dropna()

    # Write the new DataFrame to a CSV file
    filename = 'tmp/surveys' + str(year) + '.csv'
    surveys_year.to_csv(filename)

In [5]:
def one_year_csv_writer(this_year, all_data, folder_to_save, root_name):
    """
    Writes a csv file for data from a given year.

    Parameters
    ---------
    this_year : int
        year for which data is extracted
    all_data: pd.DataFrame
        DataFrame with multi-year data
    folder_to_save : str
        folder to save the data files
    root_name: str
        root of the filenames to save the data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]

    # Write the new DataFrame to a csv file
    filename = os.path.join(folder_to_save, ''.join([root_name, str(this_year), '.csv']))
    surveys_year.to_csv(filename)

start_year = 1978
end_year = 2000
directory_name = 'tmp'
root_file_name = 'different_file_name'
for year in range(start_year, end_year+1):
     one_year_csv_writer(year, surveys_df, directory_name, root_file_name)

In [6]:
def one_year_csv_writer(this_year, all_data):
    """
    Writes a csv file for data from a given year.

    this_year -- year for which data is extracted
    all_data -- DataFrame with multi-year data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]

    # Write the new DataFrame to a csv file
    filename = 'tmp/function_surveys' + str(this_year) + '.csv'
    surveys_year.to_csv(filename)
    return filename

def yearly_data_csv_writer(start_year, end_year, all_data):
    """
    Writes separate CSV files for each year of data.

    start_year -- the first year of data we want
    end_year -- the last year of data we want
    all_data -- DataFrame with multi-year data
    """

    # "end_year" is the last year of data we want to pull, so we loop to end_year+1
    output_files = []
    for year in range(start_year, end_year+1):
        output_files.append(one_year_csv_writer(year, all_data))
    return output_files

print(yearly_data_csv_writer(2000, 2001, surveys_df))

['tmp/function_surveys2000.csv', 'tmp/function_surveys2001.csv']


In [7]:
def one_year_csv_writer(this_year, all_data, folder_to_save='./', root_name='survey'):
    """
    Writes a csv file for data from a given year.

    Parameters
    ---------
    this_year : int
        year for which data is extracted
    all_data: pd.DataFrame
        DataFrame with multi-year data
    folder_to_save : str
        folder to save the data files
    root_name: str
        root of the filenames to save the data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]

    # Write the new DataFrame to a csv file
    filename = os.path.join(folder_to_save, ''.join([root_name, str(this_year), '.csv']))
    surveys_year.to_csv(filename)

def yearly_data_csv_writer(all_data, start_year=None, end_year=None):
    """
    Writes separate CSV files for each year of data.

    start_year -- the first year of data we want
    end_year -- the last year of data we want
    all_data -- DataFrame with multi-year data
    """
    if start_year is None:
        start_year = min(all_data.year)
    if end_year is None:
        end_year = max(all_data.year)
    # "end_year" is the last year of data we want to pull, so we loop to end_year+1
    for year in range(start_year, end_year+1):
        one_year_csv_writer(year, all_data)

def one_year_csv_writer(this_year, all_data, folder_to_save='./', root_name='survey'):
    """
    Writes a csv file for data from a given year.

    Parameters
    ---------
    this_year : int
        year for which data is extracted
    all_data: pd.DataFrame
        DataFrame with multi-year data
    folder_to_save : str
        folder to save the data files
    root_name: str
        root of the filenames to save the data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]
    if len(surveys_year) == 0: # 'if not len(surveys_year):' will also work 
        print('no data available for ' + this_year + ', output file not created')
    else:
        # Write the new DataFrame to a csv file
        filename = os.path.join(folder_to_save, ''.join([root_name, str(this_year), '.csv']))
        surveys_year.to_csv(filename)

def one_year_csv_writer(this_year, all_data, folder_to_save='./', root_name='survey'):
    """
    Writes a csv file for data from a given year.

    Parameters
    ---------
    this_year : int
        year for which data is extracted
    all_data: pd.DataFrame
        DataFrame with multi-year data
    folder_to_save : str
        folder to save the data files
    root_name: str
        root of the filenames to save the data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]
    if len(surveys_year) == 0:
        print('no data available for ' + this_year + ', output file not created')
    else:
        if folder_to_save not in os.listdir('.'):
            os.mkdir(folder_to_save)
            print('Processed directory created')
        # Write the new DataFrame to a csv file
        filename = os.path.join(folder_to_save, ''.join([root_name, str(this_year), '.csv']))
        surveys_year.to_csv(filename)

def yearly_data_csv_writer(all_data, yearcolumn="year",
                           folder_to_save='./', root_name='survey'):
    """
    Writes separate csv files for each year of data.

    all_data --- DataFrame with multi-year data
    yearcolumn --- column name containing the year of the data
    folder_to_save --- folder name to store files
    root_name --- start of the file names stored
    """
    years = all_data[yearcolumn].unique()
    filenames = []
    for year in years:
        filenames.append(one_year_csv_writer(year, all_data, folder_to_save, root_name))
    return filenames