In [148]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [149]:
from google.colab import files
import pandas as pd
import os
import csv
import re

In [150]:
os.chdir('/content/drive/MyDrive/Ling55AC/states') # the path where you store your csv files for all states

In [151]:
state_files = os.listdir()

In [152]:
def to_list(this_path):
  """
  Compiles all data from CSV file into a list of lists. 
  Each list is a row from the original CSV. 
  """
  lines = list()
  remove= [1,2,3,4]

  with open(this_path, 'r') as read_file:
      reader = csv.reader(read_file)
      for row_number, row in enumerate(reader, start=1):
          if(row_number not in remove):
              lines.append(row)

  with open('new_csv.csv', 'w') as write_file:
      writer = csv.writer(write_file)
      writer.writerows(lines)
  return lines

In [153]:
def get_lang_list(lang_name, lang_matrix):
  """
  Extracts the list from the matrix that holds the data about the language we are interested in. 
  """
  for lang_list in lang_matrix:
    if re.search(".*" + lang_name, lang_list[0]) is not None: 
      return lang_list

In [154]:
def get_all_states_speakers(lang_name):
  """
  Iterates through all the states and finds the population of lang_name speakers in that state. 
  Returns a list of tuples, matching the state to its population of speakers. 
  """
  result = []
  for file in state_files: 
    state_list = to_list(file)
    lang_list = get_lang_list(lang_name, state_list)
    if lang_list is None: 
        num_speakers = 0
    elif lang_list[1] != '(D)':
      num_speakers = int(lang_list[1])
    else:
      num_speakers = 0
    result.append((file, num_speakers))
  return result

In [160]:
def sorted_states(lang_name):
  """
  Gets the populations of lang_name speakers in each state, 
  compiles them into a list, and then sorts the list in ascending order. 
  """
  lang_states = get_all_states_speakers(lang_name)
  return sorted(lang_states, key=lambda x: x[1])

In [161]:
nepali = sorted_states("Nepali")
navajo = sorted_states("Navajo")
yiddish = sorted_states("Yiddish")