In [None]:
from __future__ import print_function, division
import sys
sys.path.append('/home/ego/Github/david/')

import os
from os.path import exists, join, isfile

import dataset
import pandas as pd

In [None]:
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
class Substitution:
    """A decorator to take a function's docstring and perform string
    substitution on it. 
    This decorator should be robust even if func.__doc__ is None
    (for example, if -OO was passed to the interpreter)

    Usage: construct a docstring.Substitution with a sequence or
    dictionary suitable for performing substitution; then
    decorate a suitable function with the constructed object. e.g.

    >>> sub_author_name = Substitution(author='Jason')
    >>> ...
    >>> @sub_author_name
    >>> def some_function(x):
            "%(author)s wrote this function"

    # note that some_function.__doc__ is now "Jason wrote this function"
    One can also use positional arguments.

    >>> sub_first_last_names = Substitution('Edgar Allen', 'Poe')

    >>> @sub_first_last_names
    >>> def some_function(x):
            "%s %s wrote the Raven"
    """
    def __init__(self, *args, **kwargs):
        if args and kwargs:
            raise AssertionError("Only positional or keyword args are allowed")
        # which ever (args | kwargs) is passed thorugh the condition.
        self.params = args or kwargs

    def __call__(self, func: Callable) -> Callable:
        func.__doc__ = func.__doc__ and func.__doc__ % self.params
        return func

    def update(self, *args, **kwargs) -> None:
        """Update self.params with supplied args.
        If called, we assume self.params is a dict.
        """
        self.params.update(*args, **kwargs)

In [None]:
sub_author_name = Substitution(author='Carlos', age=27, specs='Information Systems')
sub_author_name.params

In [None]:
@sub_author_name
def func(x):
    "%(author)s function"
func.__doc__

In [None]:
import numpy as np

data = {
    "col_{0:02d}".format(i): np.random.randint(0, high=1000, size=30000)
    for i in range(100)
}
data

In [None]:
class DFClassGenerator:
    '''Example Usage.
    comment = CommentsFrame(data=dict(
        author=['carlos', 'chucho'],
        cid=[122, 177],
        text=['this is carlos', 'chucho here'],
        time=['10:30', '19:40']))
    type(comment)
    '''
    CLASS_HEADER = 'class {class_name}(pd.DataFrame):'
    COLUMNS = '    {var} = "{label}"'   # we cheat an encode 4 spaces here,for demo

    CONSTRUCTOR =  ("    @property\n"
                    "    def _constructor(self):\n"
                    "        return {class_name}")
    @classmethod
    def generate_class(cls, df, class_name):
        cols = [cls.COLUMNS.format(var=c.upper(), label=c)
                for c in df.columns] # works for single hierarchical column index
        lines = [cls.CLASS_HEADER.format(class_name=class_name)]
        constructor = cls.CONSTRUCTOR.format(class_name=class_name)
        source_code = '\n'.join(lines + cols) + '\n\n' + constructor
        print(source_code)    
#source_code = DFClassGenerator.generate_class(metric, 'CommentsFrame')
#print(source_code)

In [10]:
import pandas as pd

class DavidFrame(pd.DataFrame):
    def __init__(self, *args, **kwargs):
        super(DavidFrame, self).__init__(*args, **kwargs)
        self._metadata = {'test': 'TEST'}
        
    def to_textfile(self, fn, text_col='text'):
        with open(fn, 'w', encoding='utf-8') as f:
            for x in self[text_col].tolist():
                if len(x) != 0:
                    f.write('%s\n' % x)
            f.close()

    @property
    def _constructor(self):
        
        print("< _constructor > :: called")
        return DavidFrame

    def __finalize__(self, other, method=None, **kwargs):
        '''Propagate metadata from other to self
        '''

        print("< __finalize__ > :: called")
        for name in self._metadata:
            object.__setattr__(self, name, getattr(other, name, None))
        return self

    def clone_instance_obj(self):
        '''Clones a copy of the class instance object.
        Returns a copy of the object.
        '''
        data = self._data
        data = data.copy()
        return DavidFrame(data).__finalize__(self)

In [11]:
def builder(func, *pargs, **kwards):
    print(f'calling: {func.__name__}')
    return func(*pargs, **kwards)

In [13]:
metric = TextMetrics('downloads/4Dk3jOSbz_0.json')
david = DavidFrame(metric.to_dict(orient='dict'))

In [14]:
david_copy = david.copy()
david_copy

__finalize__ called
_constructor called
__finalize__ called
_constructor called
__finalize__ called
__finalize__ called
_constructor called
__finalize__ called
_constructor called
__finalize__ called
__finalize__ called


Unnamed: 0,author,cid,text,time
0,Kevin Collins,UgxTICnj2z9wfoG1IYl4AaABAg,2016 Election put business person in govermen...,2 months ago
1,Big AL,Ugz-mRikNSCJVUzjhNd4AaABAg,I was so proud of my country the land of the A...,3 months ago
2,Stephen Patrick,UgxiD8K6JRL3asYUVat4AaABAg,From some news blasts I've seen some of Donald...,3 months ago
3,Terry Clark,Ugw4z1qX__lmGTzkmpJ4AaABAg,Impeachment cannot come soon enough for the Li...,3 months ago (edited)
4,Adeline Yee,UgwT_9FSnmq4neWV6KJ4AaABAg,What has become of America the great! You have...,3 months ago
5,Zappa Woman,Ugy3d1aEab9lzQO5ohR4AaABAg,This reminds me of Animal Farm and how the rul...,3 months ago
6,ShadowFoxSF,UgyqXuY2YMlW3Psl8nV4AaABAg,Lies on top of obstruction on top of lies...\n...,3 months ago
7,Andy C,Ugy3wTkH6AeM22tafWR4AaABAg,Someone needs to make a meme with Sarah's face...,3 months ago
8,Lorenzo,UgyJR0ptpaeIZK8fIaN4AaABAg,If some people want something it does not mean...,3 months ago
9,Don't SSleep,UgxTxzTPRBSFOflSGat4AaABAg,"David, you're a bit left of where I'm at, but ...",3 months ago


In [18]:
import pandas as pd

class DavidDataFrameBase(pd.DataFrame):
    RECIPES = None
    SESSIONS = None
    def __init__(self, data_structure):
        super(DavidDataFrameBase, self).__init__(data_structure)
        
class JsonDataFrame(DavidDataFrameBase):
    CORPUS_PATH = None
    def __init__(self, corpus_path):
        super(JsonDataFrame, self).__init__(pd.read_json(
            corpus_path, encoding='utf-8', lines=True))
        self.CORPUS_PATH = corpus_path

    def get_corpus_path(self):
        return self.CORPUS_PATH
    
class TextMetrics(JsonDataFrame):
    SENTI_LABELS = ('positive', 'negative', 'neutral')
    if not isinstance(SENTI_LABELS, tuple):
        raise TypeError('you need to pass a tuple!')

    def __init__(self, corpus_path: str):
        super().__init__(corpus_path)
        
    def get_labels(self):
        return self.SENTI_LABELS

In [19]:
tm = TextMetrics('downloads/BmYZH7xt8sU.json')
tm

Unnamed: 0,author,cid,text,time
0,PNW Ryan,UgzaG3oJa98fF6qi32h4AaABAg,I always forget to like the vid :( remind us!!!:P,25 minutes ago
1,Brian McDonald,Ugwgnpcp2e1D3mWMX2p4AaABAg,Looked like there was oil coming down left sid...,2 hours ago
2,Lewis Harvey,UgzpJXjwsyxTe1CLe4R4AaABAg,i like videos if its something really really f...,5 hours ago
3,skip rose,UgwBlsLv64UrNOcONz54AaABAg,Pos cable to small of guage causing currant su...,12 hours ago
4,Price Check On VagiClean,UgzjoHUlBJ8PDM_I3Gx4AaABAg,GT3RS: One of the best track cars ever made. Y...,12 hours ago
5,KgreProductions,UgwuiJw9Ib_5cmuV2sV4AaABAg,I try and like every video I watch but sometim...,12 hours ago
6,Vaughnny McGuire,Ugwr2AqW7W25MSimF5h4AaABAg,I literally like every video right after I cli...,18 hours ago
7,Gavin Banton,UgyENK3lNOcDbfNpMRl4AaABAg,I like for the mustache Adam gotta love it üòÇüòÇ,20 hours ago
8,Mike Scott,UgyCYyRJEIcU6NNK7rx4AaABAg,I like videos of content creators I truly supp...,1 day ago
9,rdub202,UgwB0Lo4qgBOw83utpJ4AaABAg,There are certain channels I follow that get a...,1 day ago


In [20]:
# from TextMetrics class
tm.get_labels()

('positive', 'negative', 'neutral')

In [21]:
# from 
tm.get_corpus_path()

'downloads/BmYZH7xt8sU.json'