In [None]:
import pandas as pd
import numpy as np
import statistics 
import pandasql as ps


class CombiningDataSet:
    def __init__(self, df1,df2,df3,df4):
        self.df1 = df1
        self.df2 = df2
        self.df3 = df3
        self.df4 = df4
        
    def mergeOneToOne(self):
        return pd.merge(self.df1, self.df2)
    
    #join =>{inner,outer,left,right}
    def mergeOnJoin(self,columns,join="outer"):
        return pd.merge(self.df1,self.df2,how=join,on=columns)
    
    def mergeOnIndex(self):
        return pd.merge(self.df1,self.df2,left_index=True, right_index=True)
    
    #In case, where two input DataFrames have conflicting column names
    def mergeBySuffix(self,column,suffixes=["_L", "_R"]):
         return pd.merge(self.df1,self.df2,on=column,suffixes=suffixes)
            
    #axis : {0, 1, …}, default 0
    #join : {‘inner’, ‘outer’}, default ‘outer’
    #ignore_index : boolean, default False. If True, do not use the index values on the concatenation axis. 
    def concat(self,frames,axis=0,join="outer",ignore_index=False, keys=None,sort=True):
        return pd.concat(frames,axis=axis,join=join, ignore_index=ignore_index, keys=keys,sort=sort)
    
    def appendRow(self,row,ignore_index=True):
        return self.df1.append(row, ignore_index=True)
    
    def groupBy(self,columns):
        return self.df1.groupby(columns)
    
    def groupByFilter(self,groupedDT,column,value):
        return groupedDT.filter(lambda x : x[column].mean() > value)        
    
    def aggrGroupedDT(self,groupedData):
        return groupedData.agg([np.sum, np.mean, np.std,np.max,np.min,np.median,np.var,"count"])
    
    #Index level may be specified as keys or names
    def groupByLevelMean(self,level=0):
        return self.df1.groupby(level=0).mean()
    
    def groupByLevelMin(self,level=0):
        return self.df1.groupby(level=0).min()
    
    def groupByLevelMax(self,level=0):
        return self.df1.groupby(level=0).max()
    
    def groupByLevelSum(self,level=0):
        return self.df1.groupby(level=0).sum()
    
    def groupByLevelCount(self,level=0):
        return self.df1.groupby(level=0).count()
    
    def groupByLevelMedian(self,level=0):
        return self.df1.groupby(level=0).median()
    
    #Reshaping by stacking and unstacking    
    def stack(self):
        return self.df1.stack()
    
    def unstack(self):
        return self.df1.unstack()
    
    #Transposition 
    def transpose(self):
        return self.df1.transpose()
    
    #Arithmetic operations
    def add(self,value):
        return self.df1+value
    
    def addDfToDt(self):
        return self.df1*self.df2
    
    def multiplyByValue(self,value):
        return self.df1*value
    
    def extract(self,value):
        return value/self.df1
    
    def power(self,value):
        return self.df1**value
    
    def operationsOnColumn(self,value,column,operation):
        if operation=="+":
            self.df1[column]=self.df1[column]+value
        elif operation=="*":
            self.df1[column]=self.df1[column]*value 
        elif operation=="-":
            self.df1[column]=self.df1[column]-value
        elif operation=="**":
            self.df1[column]=self.df1[column]**value
        elif operation=="/":
            self.df1[column]=self.df1[column]/value
        return self.df1
    
    def query(self,q):    
        return ps.sqldf(q, locals())
    
    def querySQL(self,q,con):
        return pd.read_sql_query(q,con)
