In [109]:
from pyspark import SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession

import csv

import datetime as dt
import re

In [111]:
sc = SparkContext()

ValueError: Cannot run multiple SparkContexts at once; existing SparkContext(app=pyspark-shell, master=local[*]) created by __init__ at <ipython-input-2-2dfc28fca47d>:1 

In [90]:
file = 'segmentid_fiscalyear_dummy.csv'

# to skip header
data = sc.textFile(file)
header = data.first()

res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: ((x[0], dt.datetime.strptime(x[1], '%m/%d/%Y').year), 1)) \
        .reduceByKey(lambda x, y: x + y) \
        .sortByKey(True, 1) \
        .collect()

In [91]:
res

[(('1000', 2018), 2),
 (('1000', 2019), 3),
 (('2000', 2016), 1),
 (('2000', 2017), 2),
 (('3000', 2016), 1),
 (('3000', 2017), 2),
 (('3000', 2018), 3),
 (('4000', 2017), 3),
 (('4000', 2018), 2),
 (('4000', 2019), 1)]

In [85]:
def ols(data):
    """ data = [(x1, y1), ..., (xi, yi), ..., (xN, yN)] """
    x_bar = sum([d[0] for d in data])/len(data)
    y_bar = sum([d[1] for d in data])/len(data)
    numerator = sum([(d[0] - x_bar)*(d[1] - y_bar) for d in data])
    denomenator = sum([(d[0] - x_bar)**2 for d in data])
    return numerator/denomenator

In [88]:
data = [(2015, 100), (2016, 200), (2017, 300), (2018, 400), (2019, 500)]
ols(data)
data = [(2015, 500), (2016, 400), (2017, 300), (2018, 200), (2019, 100)]
ols(data)

-100.0

In [84]:
# return coefficient 
sc.textFile(file) \
  .filter(lambda x: x != header) \
  .mapPartitions(lambda x: csv.reader(x)) \
  .map(lambda x: ((x[0], dt.datetime.strptime(x[1], '%m/%d/%Y').year), 1)) \
  .reduceByKey(lambda x, y: x + y) \
  .sortByKey(True, 1) \
  .map(lambda x: (x[0][0], (x[0][1], x[1]))) \
  .collect()

[('1000', (2018, 2)),
 ('1000', (2019, 3)),
 ('2000', (2016, 1)),
 ('2000', (2017, 2)),
 ('3000', (2016, 1)),
 ('3000', (2017, 2)),
 ('3000', (2018, 3)),
 ('4000', (2017, 3)),
 ('4000', (2018, 2)),
 ('4000', (2019, 1))]

In [92]:
file = 'segmentid_fiscalyear_dummy.csv'

# to skip header
data = sc.textFile(file)
header = data.first()

# return coefficient 
sc.textFile(file) \
  .filter(lambda x: x != header) \
  .mapPartitions(lambda x: csv.reader(x)) \
  .map(lambda x: ((x[0], dt.datetime.strptime(x[1], '%m/%d/%Y').year), 1)) \
  .reduceByKey(lambda x, y: x + y) \
  .sortByKey(True, 1) \
  .map(lambda x: (x[0][0], [(x[0][1], x[1])])) \
  .reduceByKey(lambda x, y: x + y) \
  .mapValues(lambda x: ols(x)) \
  .collect()

[('1000', 1.0), ('2000', 1.0), ('3000', 1.0), ('4000', -1.0)]

In [94]:
file = 'segmentid_fiscalyear_dummy.csv'

# to skip header
data = sc.textFile(file)
header = data.first()

# return coefficient 
sc.textFile(file) \
  .filter(lambda x: x != header) \
  .mapPartitions(lambda x: csv.reader(x)) \
  .map(lambda x: ((x[0], dt.datetime.strptime(x[1], '%m/%d/%Y').year), 1)) \
  .reduceByKey(lambda x, y: x + y) \
  .sortByKey(True, 1) \
  .map(lambda x: (x[0][0], [(x[0][1], x[1])])) \
  .reduceByKey(lambda x, y: x + y) \
  .mapValues(lambda x: x + [('OLS_COEF', ols(x))]) \
  .collect()

[('1000', [(2018, 2), (2019, 3), ('OLS_COEF', 1.0)]),
 ('2000', [(2016, 1), (2017, 2), ('OLS_COEF', 1.0)]),
 ('3000', [(2016, 1), (2017, 2), (2018, 3), ('OLS_COEF', 1.0)]),
 ('4000', [(2017, 3), (2018, 2), (2019, 1), ('OLS_COEF', -1.0)])]

In [98]:
def fill_zer0(row):
    expected = {2015: 0, 2016:0, 2017:0, 2018:0, 2019:0}
    for x in row:
        expected[x[0]] += x[1]
    expected = [(k, v) for k, v in expected.items()]
    return expected

In [99]:
data = [(2018, 2), (2019, 3)]
fill_zer0(data)

[(2015, 0), (2016, 0), (2017, 0), (2018, 2), (2019, 3)]

In [100]:
file = 'segmentid_fiscalyear_dummy.csv'

# to skip header
data = sc.textFile(file)
header = data.first()

# return coefficient 
sc.textFile(file) \
  .filter(lambda x: x != header) \
  .mapPartitions(lambda x: csv.reader(x)) \
  .map(lambda x: ((x[0], dt.datetime.strptime(x[1], '%m/%d/%Y').year), 1)) \
  .reduceByKey(lambda x, y: x + y) \
  .sortByKey(True, 1) \
  .map(lambda x: (x[0][0], [(x[0][1], x[1])])) \
  .reduceByKey(lambda x, y: x + y) \
  .mapValues(lambda x: fill_zer0(x) + [('OLS_COEF', ols(x))]) \
  .collect()

[('1000',
  [(2015, 0), (2016, 0), (2017, 0), (2018, 2), (2019, 3), ('OLS_COEF', 1.0)]),
 ('2000',
  [(2015, 0), (2016, 1), (2017, 2), (2018, 0), (2019, 0), ('OLS_COEF', 1.0)]),
 ('3000',
  [(2015, 0), (2016, 1), (2017, 2), (2018, 3), (2019, 0), ('OLS_COEF', 1.0)]),
 ('4000',
  [(2015, 0), (2016, 0), (2017, 3), (2018, 2), (2019, 1), ('OLS_COEF', -1.0)])]

In [120]:
def parseViolationCSV(idx, part):
    if idx == 0:
        part.next()
    for row in csv.reader(part):
        yield (row[4], row[21], row[23], row[24])
        
        
rows = sc.textFile('violation_small.csv') \
         .mapPartitionsWithIndex(parseViolationCSV) 
sqlContext = SQLContext(sc)
df = sqlContext.createDataFrame(rows, 
                ('issue_date', 'violation_county', 'house_number', 'street_name'))

Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 142.0 failed 1 times, most recent failure: Lost task 0.0 in stage 142.0 (TID 184, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\worker.py", line 377, in main
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\worker.py", line 372, in process
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\lib\site-packages\pyspark\rdd.py", line 1354, in takeUpToNumLeft
    yield next(iterator)
  File "<ipython-input-120-5c8577611b3f>", line 3, in parseViolationCSV
AttributeError: 'generator' object has no attribute 'next'

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$class.foreach(Iterator.scala:891)
	at org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
	at org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)
	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
	at org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)
	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
	at org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)
	at org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:153)
	at org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:153)
	at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
	at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	at java.lang.Thread.run(Unknown Source)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1891)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1878)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2112)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2061)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2050)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
	at org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:153)
	at org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala)
	at sun.reflect.GeneratedMethodAccessor99.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
	at java.lang.reflect.Method.invoke(Unknown Source)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Unknown Source)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\worker.py", line 377, in main
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\worker.py", line 372, in process
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\Lib\site-packages\pyspark\python\lib\pyspark.zip\pyspark\serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "C:\Users\under\AppData\Local\Continuum\anaconda3\lib\site-packages\pyspark\rdd.py", line 1354, in takeUpToNumLeft
    yield next(iterator)
  File "<ipython-input-120-5c8577611b3f>", line 3, in parseViolationCSV
AttributeError: 'generator' object has no attribute 'next'

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$class.foreach(Iterator.scala:891)
	at org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
	at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
	at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
	at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
	at org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)
	at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
	at org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)
	at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
	at org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)
	at org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:153)
	at org.apache.spark.api.python.PythonRDD$$anonfun$3.apply(PythonRDD.scala:153)
	at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
	at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
	... 1 more


In [172]:
import datetime as dt

file = 'violation_small.csv'

data = sc.textFile(file)
header = data.first()

"""
- Issue Date @ index 4
- Violation County @ index 21
- House Number @ index 23
- Street Name @ index 24
""" 


res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: (x[4], x[21], x[23], x[24])) \
        .filter(lambda x: (2015 <= dt.datetime.strptime(x[0], '%m/%d/%Y').year) and (dt.datetime.strptime(x[0], '%m/%d/%Y').year <= 2019)) \
        .collect()

In [None]:
import datetime as dt

file = 'violation_small.csv'

data = sc.textFile(file)
header = data.first()

"""
- Issue Date @ index 4
- Violation County @ index 21
- House Number @ index 23
- Street Name @ index 24
""" 


res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: (x[4], x[21], x[23], x[24])) \
        .filter(lambda x: (2015 <= dt.datetime.strptime(x[0], '%m/%d/%Y').year) and (dt.datetime.strptime(x[0], '%m/%d/%Y').year <= 2019)) \
        .collect()

In [181]:
len(res)

63

In [179]:
res[0]

(2016, 2, '2164', 'Webster Ave')

In [180]:
res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: (int(dt.datetime.strptime(x[4], '%m/%d/%Y').year), x[21], x[23], x[24])) \
        .filter(lambda x: (2015 <= x[0] and x[0] <= 2019)) \
        .map(lambda x: (x[0], countyname2borocode(x[1]), x[2], x[3])) \
        .filter(lambda x: x[1] > 0) \
        .collect()

In [175]:
def countyname2borocode(county_name):
    if (county_name == 'NEW Y') or (county_name == 'NEWY') or (county_name == 'NY') or (county_name == 'MH') or (county_name == 'MAN'):
        return 1
    elif (county_name == 'BRONX') or (county_name == 'BX'):
        return 2
    elif (county_name == 'KINGS') or (county_name == 'KING') or (county_name == 'K'):
        return 3
    elif (county_name == 'QUEEN') or (county_name == 'QU') or (county_name == 'Q'):
        return 4
    elif (county_name == 'R'):
        return 5
    else:
        return -1

In [187]:
def street_segmentid_lookup(HN, STREET_NAME, BOROCODE, physicalID_list):
    for segment in physicalID_list:
        street = STREET_NAME.lower()
        # print(type(int(segment['BOROCODE'])), type(v_record['Violation County']))
        # first check county code and street name
        if (BOROCODE == int(segment['BOROCODE'])) and \
           ((street == segment['FULL_STREE'].lower()) or (street == segment['ST_LABEL'].lower())):
           # then, check house number: odd number is stored in left
           if match_house_number(HN, segment):
                return segment['PHYSICALID']
    # returns -1 if there is no match
    return -1


def match_house_number(hn_record, segment):
    # exclude single character house numbers
    if (len(hn_record) == 1) and (not hn_record.isnumeric()):
        return False
    # if a record is empty, assigns 0
    if len(hn_record) == 0:
        hn_record = 0
    # otherwise concatenate two values together
    # example: '187-09' = 18709 <int>
    # example: '187' = 187 <int>
    else:
        hn_record = int(hn_record.replace('-', ''))
    # format house numbers in lookup segment in the same way
    # if hn_record is even, we should use 'R'; otherwise, 'L'
    if hn_record%2 == 0:
        if len(segment['R_LOW_HN']) == 0:
            lower = 0
        else:
            lower = int(re.sub('-0|-', '', segment['R_LOW_HN']))
        if len(segment['R_HIGH_HN']) == 0:
            high = 0
        else:
            high = int(re.sub('-0|-', '', segment['R_HIGH_HN']))
    else:
        if len(segment['L_LOW_HN']) == 0:
            lower = 0
        else:
            lower = int(re.sub('-0|-', '', segment['L_LOW_HN']))
        if len(segment['L_HIGH_HN']) == 0:
            high = 0
        else:
            high = int(re.sub('-0|-', '', segment['L_HIGH_HN']))
    return (lower <= hn_record) and (hn_record <= high)

In [185]:
# load lookup table
with open('data\\nyc_cscl.csv', 'r') as f:
    file = csv.DictReader(f)
    lookup = [row for row in file]

In [193]:
file = 'violation_small.csv'

data = sc.textFile(file)
header = data.first()

res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: (int(dt.datetime.strptime(x[4], '%m/%d/%Y').year), x[21], x[23], x[24])) \
        .filter(lambda x: (2015 <= x[0] and x[0] <= 2019)) \
        .map(lambda x: (x[0], countyname2borocode(x[1]), x[2], x[3])) \
        .filter(lambda x: x[1] > 0) \
        .map(lambda x: (x[0], street_segmentid_lookup(x[2], x[3], x[1], lookup))) \
        .filter(lambda x: int(x[1]) > 0) \
        .collect()

In [194]:
len(res)

49

In [195]:
def fill_zer0(row):
    expected = {2015: 0, 2016:0, 2017:0, 2018:0, 2019:0}
    for x in row:
        expected[x[0]] += x[1]
    expected = [(k, v) for k, v in expected.items()]
    return expected
    

def ols(data):
    """ data = [(x1, y1), ..., (xi, yi), ..., (xN, yN)] """
    x_bar = sum([d[0] for d in data])/len(data)
    y_bar = sum([d[1] for d in data])/len(data)
    numerator = sum([(d[0] - x_bar)*(d[1] - y_bar) for d in data])
    denomenator = sum([(d[0] - x_bar)**2 for d in data])
    if denomenator == 0:
        return 0
    else:
        return numerator/denomenator    

In [210]:
# load lookup table
with open('data\\nyc_cscl.csv', 'r') as f:
    file = csv.DictReader(f)
    lookup = [row for row in file]
    
lookup_bcast = sc.broadcast(lookup)

In [211]:
file = 'violation_small.csv'

data = sc.textFile(file)
header = data.first()

res = sc.textFile(file) \
        .filter(lambda x: x != header) \
        .mapPartitions(lambda x: csv.reader(x)) \
        .map(lambda x: (int(dt.datetime.strptime(x[4], '%m/%d/%Y').year), x[21], x[23], x[24])) \
        .filter(lambda x: (2015 <= x[0] and x[0] <= 2019)) \
        .map(lambda x: (x[0], countyname2borocode(x[1]), x[2], x[3])) \
        .filter(lambda x: x[1] > 0) \
        .map(lambda x: (x[0], street_segmentid_lookup(x[2], x[3], x[1], lookup_bcast.value))) \
        .filter(lambda x: int(x[1]) > 0) \
        .map(lambda x: ((x[1], x[0]), 1)) \
        .reduceByKey(lambda x, y: x + y) \
        .sortByKey(True, 1) \
        .map(lambda x: (x[0][0], [(x[0][1], x[1])])) \
        .reduceByKey(lambda x, y: x + y) \
        .mapValues(lambda x: fill_zer0(x) + [('OLS_COEF', ols(x))]) \
        .collect()

In [213]:
count = 0
for segment in res:
    for year in segment[1]:
        if year[0] != 'OLS_COEF':
            count += year[1]
count

49