In [1]:
Target='CUDA'
if Target =='OPENCL':
    prefixConstant="__constant"
    import pyopencl as cl
    Platforms=cl.get_platforms()
    if len(Platforms)==0:
        raise SystemError("No OpenCL platforms")
    SelDevice=None
    for device in Platforms[1].get_devices():
        print(device.name)
        if 'SUPER' in device.name:
            SelDevice=device
else:
    prefixConstant="__constant__"
    import pycuda.driver as cuda
    import pycuda.autoinit
    from pycuda.compiler import SourceModule
import os

In [2]:
G_FLOAT=0
G_INT=1
def InitSymbol(_gtype,values):
    res=''
    for k in values:
        if _gtype==G_INT:
            res+=prefixConstant+" int " +k+ "=%i;\n" %(values[k])
        else:
            res+=prefixConstant+" float " +k+ "=%.9g;\n" %(values[k])
    return res

def InitSymbolArray(_gtype,values):
    res=''
    for k in values:
        if _gtype==G_INT:
            res+=prefixConstant + " int gpu" +k+ "pr[%i]={\n" %(len(values[k]))
        else:
            res+=prefixConstant + " float gpu" +k+ "pr[%i]={\n" %(len(values[k]))
        for n,v in enumerate(values[k]):
            if _gtype==G_INT:
                res+="%i" %(v)
            else:
                res+="%.9g" %(v)
            if n<len(values[k])-1:
                res+=',\n'
            else:
                res+='};\n'
    return res
            

In [3]:
IntSymbols={}
IntSymbols['N1']=118
IntSymbols['N2']=118
IntSymbols['N3']=174
IntSymbols['Limit_I_low_PML']=11
IntSymbols['Limit_J_low_PML']=11
IntSymbols['Limit_K_low_PML']=11
IntSymbols['Limit_I_up_PML']=106
IntSymbols['Limit_J_up_PML']=106
IntSymbols['Limit_K_up_PML']=162
IntSymbols['SizeCorrI']=94
IntSymbols['SizeCorrJ']=94
IntSymbols['SizeCorrK']=150
IntSymbols['PML_Thickness']=12
IntSymbols['NumberSources']=1
IntSymbols['LengthSource']=116
IntSymbols['ZoneCount']=1
IntSymbols['SizePMLxp1']=1117909
IntSymbols['SizePMLyp1']=1117909
IntSymbols['SizePMLzp1']=1111301
IntSymbols['SizePML']=1097377
IntSymbols['SizePMLxp1yp1zp1']=1152776
IntSymbols['NumberSensors']=14100
IntSymbols['TimeSteps']=712
IntSymbols['SelRMSorPeak']=1
IntSymbols['SelMapsRMSPeak']=1
IntSymbols['IndexRMSPeak_ALLV']=0
IntSymbols['IndexRMSPeak_Vx']=0
IntSymbols['IndexRMSPeak_Vy']=0
IntSymbols['IndexRMSPeak_Vz']=0
IntSymbols['IndexRMSPeak_Sigmaxx']=0
IntSymbols['IndexRMSPeak_Sigmayy']=0
IntSymbols['IndexRMSPeak_Sigmazz']=0
IntSymbols['IndexRMSPeak_Sigmaxy']=0
IntSymbols['IndexRMSPeak_Sigmaxz']=0
IntSymbols['IndexRMSPeak_Sigmayz']=0
IntSymbols['IndexRMSPeak_Pressure']=0
IntSymbols['NumberSelRMSPeakMaps']=1
IntSymbols['SelMapsSensors']=14
IntSymbols['IndexSensor_ALLV']=0
IntSymbols['IndexSensor_Vx']=0
IntSymbols['IndexSensor_Vy']=1
IntSymbols['IndexSensor_Vz']=2
IntSymbols['IndexSensor_Sigmaxx']=0
IntSymbols['IndexSensor_Sigmayy']=0
IntSymbols['IndexSensor_Sigmazz']=0
IntSymbols['IndexSensor_Sigmaxy']=0
IntSymbols['IndexSensor_Sigmaxz']=0
IntSymbols['IndexSensor_Sigmayz']=0
IntSymbols['IndexSensor_Pressure']=0
IntSymbols['NumberSelSensorMaps']=3
IntSymbols['SensorSubSampling']=2
IntSymbols['SensorStart']=1500

FloatSymbols={'DT': 1e-7}

FloatSymbolsArrays={}
FloatSymbolsArrays['InvDXDTplus']=[8.32309439e-08,
                            8.5521549e-08,
                            8.77259154e-08,
                            8.98206025e-08,
                            9.17814305e-08,
                            9.35840774e-08,
                            9.52046335e-08,
                            9.66203615e-08,
                            9.78103927e-08,
                            9.87564306e-08,
                            9.94434615e-08,
                            9.986028e-08,
                            1.00000001e-07]
FloatSymbolsArrays['DXDTminus']=[7985238,
                            8307040.5,
                            8600860,
                            8866696,
                            9104550,
                            9314421,
                            9496310,
                            9650215,
                            9776138,
                            9874077,
                            9944034,
                            9986009,
                            10000000]
FloatSymbolsArrays['InvDXDTplushp']=[8.43856043e-08,
                            8.663595e-08,
                            8.87884681e-08,
                            9.08192703e-08,
                            9.27040418e-08,
                            9.44185743e-08,
                            9.59394626e-08,
                            9.72448007e-08,
                            9.83149349e-08,
                            9.91331177e-08,
                            9.96861829e-08,
                            9.99650354e-08,
                            1.00000001e-07]
FloatSymbolsArrays['DXDTminushp']=[8149637,
                            8457448,
                            8737276,
                            8989121,
                            9212984,
                            9408863,
                            9576760,
                            9716674,
                            9828605,
                            9912554,
                            9968519,
                            9996502,
                            10000000]


BUFFER_FOR_GPU_CODE=''
if Target == 'OPENCL':
    BUFFER_FOR_GPU_CODE+='\n#define OPENCL\n'
else:
    BUFFER_FOR_GPU_CODE+='\n#define CUDA\n'
BUFFER_FOR_GPU_CODE+='\n#define mexType float\n'
BUFFER_FOR_GPU_CODE+='#include "Indexing.h"\n'
BUFFER_FOR_GPU_CODE+=InitSymbol(1,IntSymbols)
BUFFER_FOR_GPU_CODE+=InitSymbol(0,FloatSymbols)
BUFFER_FOR_GPU_CODE+=InitSymbolArray(0,FloatSymbolsArrays)
BUFFER_FOR_GPU_CODE+='#include "GPU_KERNELS.h"\n'


In [4]:
%time
if Target=='OPENCL':
    ctx = cl.Context([SelDevice])
    queue = cl.CommandQueue(ctx)
    prg = cl.Program(ctx, BUFFER_FOR_GPU_CODE).build()
else:
    BUFFER_FOR_GPU_CODE=BUFFER_FOR_GPU_CODE.replace('\t',' ')
    prg =SourceModule(BUFFER_FOR_GPU_CODE,include_dirs=[os.getcwd()])

Wall time: 0 ns
