In [1]:

import pandas as pd

### Synchronized function

In [14]:
import threading
import time

lock = threading.Lock()


def synchronized():
    lock.acquire()
    time.sleep(3)  # 10 secs
    print(datetime.now().time())
    lock.release()


t1 = threading.Thread(target=synchronized)
t2 = threading.Thread(target=synchronized)

t1.start()
t2.start()



# OOP

### Static

In [8]:
class A:
    static_field = "a"

    @classmethod
    def static_fkt(cls):
        print(cls.static_field)
        cls.static_field += "a"
        print(cls.static_field)


A.static_fkt()

### Factory method

Mehr hier: https://iscinumpy.gitlab.io/post/factory-classmethods-in-python/

In [11]:
class A(object):  # inherits from object

    @classmethod
    def get_new_inst(cls, x):
        return cls(x)

    def __init__(self, x):
        self.x = x

    def __str__(self):
        return 'A[x=%s]' % self.x


print(A.get_new_inst(1))


### Inheritance

In [21]:
class A:

    def __init__(self, a):
        self.a = a

    def say_hi(self):
        print('hi')


class B(A):

    def __init__(self, a, b):
        super().__init__(a)
        self.b = b

    def call_super_method(self):
        super().say_hi()
        self.say_hi()

    def set_super_attr(self):
        self.a = 3
        # NOT: super().a = 3

    def __str__(self):
        return 'B [%s, %s]' % (self.a, self.b)


b = B(1, 2)
print(b)
b.call_super_method()
b.set_super_attr()
print(b)

In [13]:
if hasattr(b, 'a'):
    print('b has the property a, value = %s' % b.a)

In [16]:
if isinstance(b, A):
    print('b is an A')

a = A(3)
if not isinstance(a, B):
    print('a is not a B')


In [4]:
class A:
    def __init__(self):
        self.lst = [1, 2, 3, 4]

    def __iter__(self):
        return iter(self.lst)


a = A()
for i in a:
    print(i)


# Random

In [None]:

import numpy as np


def random_date(start, end, n=1):
    """ Generate a random datetime between start and end (exclusive) """
    start = datetime.strptime(start, '%Y-%m-%d')
    end = datetime.strptime(end, '%Y-%m-%d')
    if n == 1:
        return start + timedelta(days=np.random.randint(0, int((end - start).days)))
    return [start + timedelta(days=np.random.randint(0, int((end - start).days))) for i in range(n)]


random_date('2019-01-01', '2019-01-03', n=2)

# Data

In [6]:
df = pd.DataFrame({'col 1': [1, 3, 4, 5, 6, 7, 8],
                   'col 2': [4, 5, 6, 7, 8, 9, 0]})
df.head()

In [10]:
df = pd.DataFrame({'col 1': np.array([1, 3, 4, 5, 6, 7, 8]),
                   'col 2': np.array([4, 5, 6, 7, 8, 9, 0])})
df.head()

In [11]:
# Column by position
df.iloc[:, 0]

In [14]:
df = pd.DataFrame({'Year': [21, 21, 19, 19, 20, 20], 'Month': [6, 1, 6, 1, 6, 1]})
df.sort_values(['Year', 'Month'], inplace=True)
df.head(10)

## Conditions & filter

In [16]:
import numpy as np
import pandas as pd

df = pd.DataFrame({'Day': [1, 2, 3, 4, 5] * 2, 'Key': [1, 1, 1, 1, np.nan, 2, 2, 2, 2, np.nan]})
df.head(10)

Filter out some days for the key 2

In [17]:
df[(df['Key'] != 2) | (~df['Day'].isin([1, 2, 3]))]

Drop NaN's

In [20]:
df.dropna()

In [22]:
df[df.Key != 1.0]

## Write to Excel

### openpyxl

Does strange changes to the file. 

In [3]:
import pandas as pd
import os.path
from openpyxl import load_workbook


def create_or_append_to_excel_sheet(df, path, sheet_name):
    if not os.path.isfile(path):
        print('create new file')
        df.to_excel(path, sheet_name=sheet_name, startrow=0, index=False)
        return
    print('append to existing file')
    # TODO: check if sheet exists
    existing_data = pd.read_excel(path, sheet_name=sheet_name)
    last_row_num = existing_data.iloc[:, 0].shape[0]
    writer = pd.ExcelWriter(path, engine='openpyxl')
    book = load_workbook(path)
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
    df.to_excel(writer, sheet_name, header=False, startrow=last_row_num + 1, index=False)
    writer.save()
    writer.close()


df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [4, 5, 6, 7]})
path = './test.xlsx'
sheet_name = 'Sheet 1'

create_or_append_to_excel_sheet(df, path, sheet_name)

In [4]:
pwd

This somehow corrupts the excel file, so that next time when reading data from it with pandas, it tries to read formulas instead of values, which become Nan-s.

# Dates

In [7]:
from datetime import datetime, time, timedelta

dt = datetime.strptime('2010-10-01 12:12', '%Y-%m-%d %H:%M')
datetime.strftime(dt, '%Y-%m-%d %H:%M')

In [8]:
import humanize

delta = timedelta(hours=-500)
print(str(delta))
print(humanize.naturaltime(delta))


In [17]:
import math

a = 2.5
time_ = time(hour=math.floor(a), minute=round((a - math.floor(a)) * 60))
print(time_)

In [20]:
import pandas as pd

t = pd.Timestamp('2013-12-25 00:01:01')
print(t, type(t))

d = t.date()
print(d, type(d))
dt = t.to_pydatetime()
print(dt, type(dt))

# Profiling

In [3]:
import gc
import sys
import _pickle as cPickle


def memory_dump():
    with open("memory.pickle", 'wb') as dump:
        xs = []
        for obj in gc.get_objects():
            i = id(obj)
            size = sys.getsizeof(obj, 0)
            #    referrers = [id(o) for o in gc.get_referrers(obj) if hasattr(o, '__class__')]
            referents = [id(o) for o in gc.get_referents(obj) if hasattr(o, '__class__')]
            if hasattr(obj, '__class__'):
                cls = str(obj.__class__)
                xs.append({'id': i, 'class': cls, 'size': size, 'referents': referents})
        cPickle.dump(xs, dump)


memory_dump()

In [8]:
with open("memory.pickle", 'rb') as dump:
    objs = cPickle.load(dump)
for i in objs[0:10]:
    print(i)