In [1]:
import numpy as np
import pandas as pd
import time
import math

In [2]:
featuresMin = 2
featuresMax = 11
samplesMin = 1000
samplesMax = 30000
powerMin = 1
powerMax = 5
coefficientMin = -200
coefficientMax = 200
biasMin = -200
biasMax = 200
scopes = [-0.01, -0.1, -1, -2, -5, -10, -20, -50, -80, -100, -150, -200, 0.01, 0.1, 1, 2, 5, 10, 20, 50, 80, 100, 150, 200]
np.random.seed(int(time.time()))

In [3]:
def generate_metadata(features=None, linear=True):
    features = features or np.random.randint(featuresMin, featuresMax)
    featureInfos = []
    bias = np.random.randint(biasMin, biasMax)
    metadata = {
        'bias': bias,
        'featureInfos': featureInfos
    }
    for i in range(features):
        scope1 = np.random.choice(scopes)
        scope2 = np.random.choice(scopes)
        coefficient = round(np.random.rand() * (coefficientMax - coefficientMin) + coefficientMin, 2)
        power = 1 if linear else np.random.randint(powerMin, powerMax)
        featureInfos.append({
            'min': min(scope1, scope2),
            'max': max(scope1, scope2) + 1,
            'power': power,
            'coefficient': coefficient
        })
    return metadata


def compute_y(metadata, X):
    y = np.full(X.shape[0], metadata['bias'], dtype=float)
#     print(y)
    
    featureInfos = metadata['featureInfos']
    for i in range(len(featureInfos)):
        featureInfo = featureInfos[i]
        coefficient = featureInfo['coefficient']
        power = featureInfo['power']
#         print(X[:, i])
        y += coefficient * np.power(X[:, i], power)
    return y


def write_file(X, y, file):
    (xRows, xCols) = X.shape

    totalArray = np.append(X, np.reshape(y, newshape=(xRows, 1)), axis=1)
    columns = ['X{0}'.format(i + 1) for i in range(xCols)]
    columns.append('Y')
    dataFrame = pd.DataFrame(totalArray, columns=columns)
#     print(columns)
#     print(dataFrame)
    dataFrame.to_csv(file, index=False)


def generate_data(metadata=None, features=None, linear=True, samples=None, file=None):
    metadata = metadata or generate_metadata(features, linear)
    samples = samples or np.random.randint(samplesMin, samplesMax)
    features = len(metadata['featureInfos'])
    
#     print('samples = {0}, features = {1}'.format(samples, features))
    X = np.ndarray(shape=(samples, features))
    for i in range(features):
        featureInfo = metadata['featureInfos'][i]
        max = featureInfo['max']
        min = featureInfo['min']
        X[:, i] = np.random.random_sample((samples, )) * (max - min) + min
        
    y = compute_y(metadata, X)
    
    if file:
        write_file(X, y, file)
        
    return (X, y, metadata)


def generate_function(metadata):
    exp = 'y = {0}'.format(metadata['bias'])
    for i in range(len(metadata['featureInfos'])):
        featureInfo = metadata['featureInfos'][i]
        operand = '({0})*X{1}^{2}'.format(featureInfo['coefficient'], i + 1, featureInfo['power'])
        exp += ' + ' + operand
    return exp
    

metadata = {
    'bias': 14,
    'featureInfos': [
        {
            'min': -2,
            'max': 2,
            'coefficient': 3.5,
            'power': 1
        },
        {
            'min': -100,
            'max': 100,
            'coefficient': -8.1,
            'power': 1
        }
    ]
}

(X, y, metadata) = generate_data(metadata=metadata, features=6, samples=500, file='data2.csv')
# print(X[: 5, :])
# print(y[: 5])
print(generate_function(metadata))

y = 14 + (3.5)*X1^1 + (-8.1)*X2^1
