In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("air_quality_index.csv")

In [3]:
df.head()

Unnamed: 0,DATE,COUNTRY,CITY,VALUE
0,2019-01-01,US,San Antonio,42.0
1,2019-01-01,US,Saint Paul,21.0
2,2019-01-01,US,Denver,42.0
3,2019-01-01,US,San Francisco,13.0
4,2019-01-01,US,Madison,14.0


In [4]:
cityList = ["Los Angeles", "San Antonio", "Seattle", "Portland"]

In [5]:
dfDict = {}
for city in cityList:
    dfCity = df.loc[df['CITY']==city]
    dfCity['QUALITY'] = ""
    dfDict[city] = dfCity

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [6]:
def airQuality(val):
    quality = ''
    if val < 12.5:
        quality = "good"
    elif 12.5<= val < 25:
        quality = "fair"
    elif 25<=val< 50:
        quality = "poor"
    elif 50<=val < 150:
        quality = "very_poor"
    else:
        quality = "extremely_poor"
    return quality

def qualityToIndex(quality):
    index = 0
    if quality == "good":
        index = 1
    elif quality == "fair":
        index = 2
    elif quality == "poor":
        index = 3
    elif quality == "very_poor":
        index = 4
    else:
        index = 5
    return index

In [7]:
for city in dfDict:
    dfCity = dfDict[city]
    dfCity.reset_index(drop=True, inplace=True)
    for index, row in dfCity.iterrows():
        quality = airQuality(row['VALUE'])
        dfCity.at[index, 'QUALITY'] = quality

In [9]:
dfDict["Seattle"].head()

Unnamed: 0,DATE,COUNTRY,CITY,VALUE,QUALITY
0,2019-01-01,US,Seattle,63.0,very_poor
1,2019-01-02,US,Seattle,37.0,poor
2,2019-01-03,US,Seattle,30.0,poor
3,2019-01-04,US,Seattle,33.0,poor
4,2019-01-05,US,Seattle,24.0,fair


In [10]:
dictChangeAll = {}

for city in dfDict:
    dictChange = {}
    dfCity = dfDict[city]

    for index, row in dfCity.iterrows():
        if index == 0:
            continue
        prevState = dfCity.at[index-1, 'QUALITY']
        currentState = dfCity.at[index, 'QUALITY']
        key = prevState + ':' + currentState

        dictChange[key] = dictChange.get(key, 0) + 1
    
    dictChangeAll[city] = dictChange

In [11]:
dictChangeAll["Seattle"]

{'very_poor:poor': 29,
 'poor:poor': 155,
 'poor:fair': 123,
 'fair:poor': 122,
 'fair:fair': 329,
 'fair:very_poor': 18,
 'poor:very_poor': 24,
 'very_poor:very_poor': 17,
 'very_poor:fair': 15,
 'fair:good': 17,
 'good:fair': 18,
 'poor:good': 5,
 'good:poor': 4,
 'good:good': 7,
 'fair:extremely_poor': 1,
 'extremely_poor:extremely_poor': 4,
 'extremely_poor:very_poor': 1,
 'poor:extremely_poor': 2,
 'extremely_poor:fair': 2}

In [12]:
matrixDict = {}

for city in dictChangeAll:
    dictChange = dictChangeAll[city]
    matrix= [ [0 for i in range(5)] for i in range(5) ]
    
    for key in dictChange:
        value = dictChange[key]
        prevState, toState = key.split(':')
        prevIndex = qualityToIndex(prevState)
        toIndex = qualityToIndex(toState)
        matrix[prevIndex-1][toIndex-1] = value
    
    for i in range(len(matrix)):
        totalRow = sum(matrix[i])
        if totalRow == 0:
            continue
        for j in range(len(matrix[i])):
            matrix[i][j] /= totalRow
    
    npMatrix = np.array(matrix)
    
    matrixDict[city] = npMatrix

In [16]:
matrixDict["Portland"]

array([[0.33333333, 0.54263566, 0.08527132, 0.03875969, 0.        ],
       [0.13016529, 0.65909091, 0.17975207, 0.0268595 , 0.00413223],
       [0.09009009, 0.36936937, 0.45945946, 0.07207207, 0.00900901],
       [0.04      , 0.24      , 0.44      , 0.28      , 0.        ],
       [0.125     , 0.25      , 0.        , 0.125     , 0.5       ]])

In [18]:
# Using 1000 iteration

for city in matrixDict:
    matrix = matrixDict[city]
    
    initialState = np.array([[0,1,0,0,0]]).transpose()
    prevState = initialState
    for i in range(1):
        nextState = np.matmul(matrix.transpose(), prevState)
        prevState = nextState

    print(city, "\n", nextState, "\n\n")

Los Angeles 
 [[0.01639344]
 [0.26229508]
 [0.50819672]
 [0.21311475]
 [0.        ]] 


San Antonio 
 [[0.03017241]
 [0.40948276]
 [0.49568966]
 [0.06465517]
 [0.        ]] 


Seattle 
 [[0.0349076 ]
 [0.67556468]
 [0.25051335]
 [0.03696099]
 [0.00205339]] 


Portland 
 [[0.13016529]
 [0.65909091]
 [0.17975207]
 [0.0268595 ]
 [0.00413223]] 




In [115]:
# Using steady state condition

for city in matrixDict:
    matrix = matrixDict[city]
    currMatrix = matrix.copy().transpose()
    
    for i in range(5):
        currMatrix[i][i] -= 1
    for i in range(5):
        currMatrix[4][i] = 1
    
    inverseMatrix = np.linalg.inv(currMatrix)
    
    Y = np.array([[0,0,0,0,1]]).transpose()
    
    steadyState = np.matmul( inverseMatrix, Y )
    
    print(city, "\n", steadyState, "\n\n")

Los Angeles 
 [[0.00223964]
 [0.06830907]
 [0.42441209]
 [0.50279955]
 [0.00223964]] 


San Antonio 
 [[0.02134831]
 [0.26067416]
 [0.55168539]
 [0.16516854]
 [0.0011236 ]] 


Seattle 
 [[0.03251151]
 [0.54566026]
 [0.34708233]
 [0.0668897 ]
 [0.00785619]] 


Portland 
 [[0.14465926]
 [0.54389714]
 [0.24801934]
 [0.05446042]
 [0.00896383]] 


