<a href="https://colab.research.google.com/github/efg5432/Cathay/blob/main/Cathay.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os 
import pandas as pd
import re

In [None]:
class DataframeAll:
    def __init__(self, path):
      self.path = path
    
    def createDf(self, filePath, year, quarter):
      baseName = os.path.basename(filePath)
      fileName = os.path.splitext(baseName)[0]
      dataframe = pd.read_csv(filePath, header=1)
      county = fileName.split('_')[0]
      dataType = fileName.split('_')[-1]
      dataframe['df_name'] = f'{year}_{quarter}_{county}_{dataType}'
      return dataframe
    
    def readAllData(self):
      LandRegex = re.compile(r'\w_lvr_land_\w')
      df_list = list()
      for j in os.listdir(os.path.join(self.path,'Data')):
        for i in os.listdir(os.path.join(self.path,'Data', j)):
          if len(i) == 5:
            year = i[:3]
            quarter = i[-1]
          if len(LandRegex.findall(i)) != 0:
            df_list.append(self.createDf(os.path.join(self.path,'Data', j, i), year, quarter))
      return pd.concat(df_list)


In [None]:
class Filter:
    def __init__(self, df):
        self.df = df
        self.df['berth'] = self.df['transaction pen number'].apply(self.parkingNum)
        self.df['floorNum'] = self.df['total floor number'].apply(self.cn2num)

    def getDataframe(self):
      return self.df

    def getFilter(self):
        return self.df[(self.df['main use'] == '住家用') & (self.df['floorNum'] >= 13) & (self.df['building state'] == '住宅大樓(11層含以上有電梯)')]
    
    def saveFilterToCsv(self, path):
      self.getFilter().to_csv(os.path.join(path, 'Output', 'filter.csv'))

    def parkingNum(self, transactionPenNumber):
      return int(re.findall(r'[0-9]+', transactionPenNumber)[-1])
    
    def cn2num(self, floorNum):
      digit = {'一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9}
      # print(floorNum)
      if isinstance(floorNum, float):
        # print(floorNum)
        return None
      if isinstance(floorNum, int):
        return floorNum
      floorNum = floorNum[:-1]
      idx_s = floorNum.find('十')
      if idx_s != -1:
        return  digit.get(floorNum[idx_s - 1:idx_s], 1)*10 + digit.get(floorNum[-1], 0)
      return digit.get(floorNum[-1], 0)

In [None]:
class Count:
    def __init__(self, df):
        self.df = df
    
    def getTotalNumber(self):
      return self.df.shape[0]
    
    def getTotalBerth(self):
      return self.df['berth'].sum()
    
    def getMeanPrice(self):
      return self.df['total price NTD'].mean()
    
    def getTotalBerthPrice(self):
      return self.df['the berth total price NTD'].sum()
    
    def getMeanBerthPrice(self):
      return self.getTotalBerthPrice()/self.getTotalBerth()
    
    def getSummary(self):
      summary = {"總件數": self.getTotalNumber(), "總車位數": self.getTotalBerth(), "平均總價元": self.getMeanPrice(), "平均車位總價元": self.getMeanBerthPrice()}
      return pd.DataFrame(list(summary.items()), columns = ['統計', '值'])

    def saveSummaryToCsv(self, path):
      self.getSummary().to_csv(os.path.join(path, 'Output', 'count.csv'))


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
path = 'gdrive/My Drive/Cathay'
data = DataframeAll(path)
df_all = data.readAllData()
filter = Filter(df_all)
df_filter = filter.getFilter()
filter.saveFilterToCsv(path)
count = Count(df_filter)
print(count.getSummary())
count.saveSummaryToCsv(path)

        統計             值
0      總件數  6.961200e+04
1     總車位數  5.767200e+04
2    平均總價元  1.475090e+07
3  平均車位總價元  9.232892e+05
