# Assignment - Python Warmup
Please follow the instructions in the assignment's PDF and fill in the code for each function.

## Exercise 1

In [41]:
import pandas as pd
import numpy as np

In [42]:
# for each sentence in file, append word in specified position
def positionWord(filePath, position):
  import string
  res = []
  with open(filePath, 'r') as f: # read file
    for line in f: # loop through lines
      sentence = line.strip()
      words = sentence.split(' ') # split sentence to words
      res.append(words[position].strip(string.punctuation)) # append word in position <position> to result
  return res

In [43]:
# positionWord("example.txt", 3)

## Exercise 2

In [44]:
# given text of potentially lower- and upper-case, return only uppercase characters
def onlyUpperCase(text):
  res = '' # initialize result as empty string
  for ch in text: # loop through characters
    if ch.isupper(): # append if its an uppercase char
      res += ch
  return res

In [45]:
# onlyUpperCase("lorem ipsum Dolor Sit amet, conSectetur.")

## Exercise 3

In [46]:
# return divisors of num2 between 1 and num1 (inclusive)
def divisors(num1, num2):
  # return range of numbers from num2, skip num2, till num1
  return list(range(num2, num1+1, num2))

In [47]:
# print(divisors(26, 6))
# print(divisors(2,2))
# print(divisors(15,1))

## Exercise 4

In [48]:
# return dictionary of word counts given list of words
def countWords(wordList):
  wordCount = dict()
  for word in wordList: # loop through wordList
    wordCount[word] = wordCount.get(word, 0) + 1 # increment word count
  return wordCount

In [49]:
# countWords(["the", "seething", "sea", "ceaseth", "and", "thus", "the", "seething", "sea", "sufficeth", "us"])

## Exercise 5

In [50]:
# Given list of words, return dictionary of word counts by length of word
def organizedCountWords(wordList):
  res = dict()
  for word in wordList: # loop through words
    if len(word) not in res: # if wordlen not in dict
      res[len(word)] = dict() # empty dict
    res[len(word)][word] = res[len(word)].get(word, 0) + 1 # increment by 1
  return res

In [51]:
organizedCountWords(["the", "seething", "sea", "ceaseth", "and", "thus", "the", "seething", "sea", "sufficeth", "us"])

{3: {'the': 2, 'sea': 2, 'and': 1},
 8: {'seething': 2},
 7: {'ceaseth': 1},
 4: {'thus': 1},
 9: {'sufficeth': 1},
 2: {'us': 1}}

##Exercise 6

In [52]:
# Compute average price of rooms in neighbourhood according to provided filters
def AvgNeighbourhoodListingPrice(neighbourhoodList, room_type, number_of_reviews, minimum_nights):
  df = pd.read_csv('https://raw.githubusercontent.com/MIE451-2021/course-datasets/master/toronto_airbnb_listings_Aug2019.csv') # read df
  df = df[df['neighbourhood'].isin(neighbourhoodList)] # filter by neighbourhoodList
  df = df[df['room_type'] == room_type] # filter by room_type
  df = df[df['number_of_reviews'] >= number_of_reviews] # filter by number of reviews
  df = df[df['minimum_nights'] <= minimum_nights] # filter by min nights
  grouped_df = df.groupby('neighbourhood')[['price']].mean().round(2) # make one column, index by neighbourhood
  grouped_df.sort_values(by=['price'], inplace=True, # sort descending order
               ascending = [False])
  return grouped_df

In [53]:
AvgNeighbourhoodListingPrice(["Edenbridge-Humber Valley","Annex", "The Beaches"],"Entire home/apt", 30, 3)

Unnamed: 0_level_0,price
neighbourhood,Unnamed: 1_level_1
Edenbridge-Humber Valley,453.33
Annex,141.99
The Beaches,126.24


## Exercise 7

In [54]:
# Solve for coefficients for least squares regression problem
def leastSquaresFit(urlX,urlY):
  # each line is self-explanatory and doesn't need comments
  Xdf = pd.read_csv(urlX, header=None)
  X = Xdf.to_numpy()
  N = X.shape[0]
  column_of_ones = np.ones((N, 1))
  X = np.hstack((column_of_ones, X))
  ydf = pd.read_csv(urlY, header=None)
  y = ydf.to_numpy()
  XT = np.transpose(X)
  XTX = np.matmul(XT, X)
  XTy = np.matmul(XT, y)
  w = np.matmul(np.linalg.inv(XTX), XTy)
  return w

In [55]:
URLX = 'https://raw.githubusercontent.com/MIE451-2021/course-datasets/master/X.csv'
URLY = 'https://raw.githubusercontent.com/MIE451-2021/course-datasets/master/Y.csv'
leastSquaresFit(URLX,URLY)

array([[1.15740465e-03],
       [7.85819959e+01],
       [6.86835383e+01],
       [6.64017869e+01]])

----------
## Validators
This following cell provide basic validation of your functions.

You should run each cell and make sure you do not get an exception.

**IMPORTANT: passing these validators does not mean your code is correct. These are basic validators to make sure the interface of your functions is correct.**

In [56]:
assert(isinstance(positionWord("example.txt", 2), list))

In [57]:
assert(isinstance(onlyUpperCase("lorem ipsum Dolor Sit amet, conSectetur."), str))

In [58]:
assert(isinstance(divisors(23, 6), list))

In [59]:
words = ["the", "seething", "sea", "ceaseth", "and", "thus", "the", "seething", "sea", "sufficeth", "us"]
assert(isinstance(countWords(words), dict))

In [60]:
words = ["the", "seething", "sea", "ceaseth", "and", "thus", "the", "seething", "sea", "sufficeth", "us"]
assert(isinstance(organizedCountWords(words), dict))

In [61]:
df = AvgNeighbourhoodListingPrice(["Little Portugal","Waterfront Communities-The Island","Rosedale-Moore Park","Kensington-Chinatown"], "Private room", 50, 4)
assert(df.shape == (4,1))

In [62]:
import numpy as np
w = leastSquaresFit("https://raw.githubusercontent.com/MIE451-2022/course-datasets/main/X.csv","https://raw.githubusercontent.com/MIE451-2022/course-datasets/main/Y.csv")
assert(isinstance(w, np.ndarray))

In [63]:
import pandas as pd
assert(isinstance(df,pd.DataFrame))