In [1]:
import os
import numpy as np
import pandas 
import dask.dataframe as dd
import glob
from dask import delayed
import dask

import warnings
 
os.environ["OMP_NUM_THREADS"] = "1"
warnings.filterwarnings('ignore')

import dask.array as da
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from dask_ml.decomposition import PCA,IncrementalPCA
from dask_ml.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from dask_ml.model_selection import train_test_split
from dask_ml.model_selection import GridSearchCV
from mpl_toolkits.mplot3d import Axes3D

from pymks import (
    generate_multiphase,
    solve_fe,
    plot_microstructures,
    PrimitiveTransformer,
    TwoPointCorrelation,
    FlattenTransformer
)


In [2]:
from pymks import GenericTransformer

In [3]:
x_data = da.from_zarr('output.zarr')
y_data = da.from_zarr('output_y.zarr')

In [4]:
x_data

Unnamed: 0,Array,Chunk
Bytes,9.44 GB,106.12 MB
Shape,"(8900, 132651)","(100, 132651)"
Count,90 Tasks,89 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 9.44 GB 106.12 MB Shape (8900, 132651) (100, 132651) Count 90 Tasks 89 Chunks Type float64 numpy.ndarray",132651  8900,

Unnamed: 0,Array,Chunk
Bytes,9.44 GB,106.12 MB
Shape,"(8900, 132651)","(100, 132651)"
Count,90 Tasks,89 Chunks
Type,float64,numpy.ndarray


In [5]:
y_data

Unnamed: 0,Array,Chunk
Bytes,71.20 kB,71.20 kB
Shape,"(8900, 1)","(8900, 1)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 71.20 kB 71.20 kB Shape (8900, 1) (8900, 1) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",1  8900,

Unnamed: 0,Array,Chunk
Bytes,71.20 kB,71.20 kB
Shape,"(8900, 1)","(8900, 1)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [6]:
x_train, x_test, y_train, y_test = train_test_split(
    x_data,
    y_data,
    test_size=0.2,
    random_state=3
)

In [None]:
pca_steps = [
    ("reshape",GenericTransformer(
            lambda x: x.reshape(x.shape[0], 51, 51, 51)
        )
    ),
    ("discritize",PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),
    ("correlations",TwoPointCorrelation(periodic_boundary=True, cutoff=15, correlations=[(0, 0)])),
    ('flatten', GenericTransformer(
            lambda x: x.reshape(x.shape[0],-1)
        )
    ),  
#    ('reduce', GenericTransformer(lambda x: x[:, :4]))
#    ('pca',PCA(n_components=3, svd_solver= 'full', random_state=999))
]

pca_pipeline = Pipeline(steps=pca_steps)

pipeline = Pipeline(steps=pca_steps + [
    ('poly', PolynomialFeatures()),
    ('regressor', LinearRegression())
])

In [None]:
# twopt_steps = [
#     ("reshape",GenericTransformer(
#             lambda x: x.reshape(x.shape[0], 51, 51, 51)
#         )
#     ),
#     ("discritize",PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),
#     ("correlations",TwoPointCorrelation(periodic_boundary=True, cutoff=20, correlations=[(0, 0)]))
    
# ]

# twopt_pipeline = Pipeline(steps=twopt_steps)



In [None]:
x_trans_train = pca_pipeline.fit(x_train).transform(x_train)[:, :4].compute()


In [None]:
x_trans_train

In [7]:
pcainc_steps = [
    ("reshape",GenericTransformer(
            lambda x: x.reshape(x.shape[0], 51, 51, 51)
        )
    ),
    ("discritize",PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),
    ("correlations",TwoPointCorrelation(periodic_boundary=True, cutoff=15, correlations=[(0, 0)])),
    ('flatten', GenericTransformer(
            lambda x: x.reshape(x.shape[0],-1)
        )
    ),  
    ('pca',IncrementalPCA(n_components=3, svd_solver= 'full', random_state=999, batch_size = 100))
]

pcainc_pipeline = Pipeline(steps=pcainc_steps)

In [8]:
x_trans_train_inc= pcainc_pipeline.fit(x_train).transform(x_train).compute()
