In [1]:
import pandas as pd

In [59]:
def parse_str(s, separator, quote):
    quote_pos = []
    for i in range(len(s)):
        if s[i] == quote:
            quote_pos.append(i)

    if len(quote_pos) % 2 != 0:
        raise Exception('Quoted string is malformed.')

    i = 0
    parse_output = []
    current_pos = 0
    has_qoute = True
    while current_pos < len(s):
        if i == len(quote_pos):
            has_qoute = False
            start = len(s)
            end = len(s)
        else:
            start = quote_pos[i]
            # add one for separator
            end = quote_pos[i+1] + 1
        # Get string from current_pos up to the first quote
        # The start and end of a non_quote_str must be a separator, remove those
        non_quote_str = s[current_pos:start]

        if len(non_quote_str) != 0:
            # exclude empty string in the end caused by last separator
            non_quote_str_parsed = non_quote_str.split(separator)[:-1]
            parse_output.extend([item.strip() for item in non_quote_str_parsed])

        if has_qoute:
            quote_str = s[start:end]
            parse_output.append(quote_str)
        else:
            # This is very last part (that's why there is no qoute)
            break

        i += 2
        current_pos = end + 1

    return parse_output

def read_csv_as_nested_dict(filename, keyfield, separator, quote):
    """
    Inputs:
      filename  - Name of CSV file
      keyfield  - Field to use as key for rows
      separator - Character that separates fields
      quote     - Character used to optionally quote fields

    Output:
      Returns a dictionary of dictionaries where the outer dictionary
      maps the value in the key_field to the corresponding row in the
      CSV file.  The inner dictionaries map the field names to the
      field values for that row.
    """
    outer_dict = {}
    
    with open(filename, 'r') as f:
        header = f.readline()
        if header[-1] != separator:
            header = header + separator
        columns = [s.strip() for s in header.split(separator)]
        
        for line in f.readlines():
            if len(line.strip()) == 0:
                continue
            # Make sure the line ends with a separator
            # That way, we can assume each field in the line is followed by a separator
            if line[-1] != separator:
                line = line + separator
            fields = parse_str(line, separator, quote)
            inner_dict = dict(zip(columns, fields))
            key = inner_dict[keyfield]
            outer_dict[key] = inner_dict
            
    return outer_dict
            
            


In [89]:
s = ''.join(['Field1,Field2,Field3,Field4\n', '1,2,3,4\n', '5,6,7,8\n', '9,10,11,12\n'])

In [91]:
f = open('table.csv', 'w')
f.write(s)
f.close()


In [92]:
cat table.csv

Field1,Field2,Field3,Field4
1,2,3,4
5,6,7,8
9,10,11,12


In [97]:
l = [70, 105, 101, 108, 100, 49, 44, 70, 105, 101, 108, 100, 50, 44, 70, 105, 101, 108, 100, 51, 44, 70, 105, 101, 108, 100, 52, 10, 49, 44, 50, 44, 51, 44, 52, 10, 53, 44, 54, 44, 55, 44, 56, 10, 57, 44, 49, 48, 44, 49, 49, 44, 49, 50, 10]

In [101]:
chr(10)

'\n'

In [102]:
''.join([chr(i) for i in l])

'Field1,Field2,Field3,Field4\n1,2,3,4\n5,6,7,8\n9,10,11,12\n'

In [60]:
nested_dict = read_csv_as_nested_dict('isp_gdp.csv', 'Country Name', ',', '"')

In [94]:
read_csv_as_nested_dict('table.csv', 'Field2', ',', '"')

{'10': {'Field1': '9', 'Field2': '10', 'Field3': '11', 'Field4': '12'},
 '2': {'Field1': '1', 'Field2': '2', 'Field3': '3', 'Field4': '4'},
 '6': {'Field1': '5', 'Field2': '6', 'Field3': '7', 'Field4': '8'}}

In [96]:
read_csv_as_nested_dict('table.csv', 'Field1', ',', '"')

{'1': {'Field1': '1', 'Field2': '2', 'Field3': '3', 'Field4': '4'},
 '5': {'Field1': '5', 'Field2': '6', 'Field3': '7', 'Field4': '8'},
 '9': {'Field1': '9', 'Field2': '10', 'Field3': '11', 'Field4': '12'}}

In [86]:
read_csv_as_nested_dict('table2.csv', 'Field2', ',', '"')
"""
expected {'6': {'Field1': '5', 'Field2': '6', 'Field4': '8', 'Field3': '7'}, '2': {'Field1': '1', 'Field2': '2', 'Field4': '4', 'Field3': '3'}, '10': {'Field1': '9', 'Field2': '10', 'Field4': '12', 'Field3': '11'}} but received (Exception: KeyError) "'Field2'" at line 75, in read_csv_as_nested_dict
"""

IOError: [Errno 2] No such file or directory: 'table2.csv'

In [84]:
 
            
def build_plot_values(gdpinfo, gdpdata):
    """
    Inputs:
      gdpinfo - GDP data information dictionary
      gdpdata - A single country's GDP stored in a dictionary whose
                keys are strings indicating a year and whose values
                are strings indicating the country's corresponding GDP
                for that year.

    Output: 
      Returns a list of tuples of the form (year, GDP) for the years
      between "min_year" and "max_year", inclusive, from gdpinfo that
      exist in gdpdata.  The year will be an integer and the GDP will
      be a float.
    """
    
    #gdp = read_csv_as_nested_dict(gdpinfo['gdpfile'], gdpinfo['country_name'], gdpinfo['separator'], gdpinfo['quote'])
    
    min_year = gdpinfo['min_year']
    max_year = gdpinfo['max_year']
    year_gdp = []

    for y, gdp in gdpdata.items():
        try:
            y_int = int(y)
            if min_year <= y_int <= max_year and gdp.strip() != '':
                year_gdp.append((y_int, float(gdp)))
        except ValueError:
            continue
            
    year_gdp = sorted(year_gdp, key=lambda x : x[0])
    return year_gdp  

In [88]:
{}["a"]

KeyError: 'a'

In [85]:
build_plot_values({'separator': '', 'quote': '', 'country_name': 'Country Name', 'gdpfile': '', 'country_code': 'Code', 'min_year': 2001, 'max_year': 2015}, 
                  {'2011': '10', '2013': '', '2008': '7', '2006': '5', '2012': '11', '2015': '14', '2010': '', '2009': '8', '2002': '1', '2007': '', '2001': '', '2014': '13', '2003': '2', '2004': '', '2005': '4'}) 

[(2002, 1.0),
 (2003, 2.0),
 (2005, 4.0),
 (2006, 5.0),
 (2008, 7.0),
 (2009, 8.0),
 (2011, 10.0),
 (2012, 11.0),
 (2014, 13.0),
 (2015, 14.0)]

In [47]:
df[df['Country Code'] == 'COG']

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
41,"Congo, Rep.",COG,GDP (current US$),NY.GDP.MKTP.CD,131731862.6,151675739.2,166521239.9,172233430.9,185693724.8,198318063.9,...,8394688000.0,11859010000.0,9593537000.0,12007880000.0,14425610000.0,13677930000.0,14085850000.0,14177440000.0,8553155000.0,


In [58]:
del quote_str

In [50]:
nested_dict

{'"Bahamas, The"': {'1960': '169802257.8',
  '1961': '190096176.6',
  '1962': '212252822.2',
  '1963': '237742768.5',
  '1964': '266664053.7',
  '1965': '300392162.5',
  '1966': '340000006.4',
  '1967': '390196085.7',
  '1968': '444901969.1',
  '1969': '528137264.8',
  '1970': '538423167.5',
  '1971': '573400000',
  '1972': '590900000',
  '1973': '670900000',
  '1974': '632400000',
  '1975': '596200000',
  '1976': '642100000',
  '1977': '713000000',
  '1978': '832400000',
  '1979': '1139800100',
  '1980': '1335300000',
  '1981': '1426500000',
  '1982': '1578300000',
  '1983': '1732800000',
  '1984': '2041100000',
  '1985': '2320699900',
  '1986': '2472500000',
  '1987': '2713999900',
  '1988': '2817900000',
  '1989': '3062000000',
  '1990': '3166000000',
  '1991': '3111160000',
  '1992': '3109000000',
  '1993': '3092000000',
  '1994': '3259000000',
  '1995': '3429000000',
  '1996': '3609000000',
  '1997': '4961119000',
  '1998': '5353524000',
  '1999': '6019972000',
  '2000': '63275520